From d8a88d4b598384e647fe3b76e97ee1d97cdc22ae Mon Sep 17 00:00:00 2001 From: shouhuanxiaoji <345865759@163.com> Date: Tue, 12 Aug 2025 13:10:29 +0800 Subject: [PATCH 1/6] From seperated components building to all-in-one llvm-projects building --- ...-18-Always-build-shared-libs-for-LLD.patch | 29 + ...Use-the-ClangBuiltin-class-to-automa.patch | 670 + ...uctions-not-recorded-in-ErasedInstrs.patch | 287 - ...oolset-path-precedence-over-Installe.patch | 41 + ...-Support-mcmodel-for-LoongArch-72514.patch | 110 + ...d-substitutions-for-record-types-whe.patch | 121 + ...h-Add-support-for-OpenCloudOS-triple.patch | 26 + ...Improve-codegen-for-atomic-ops-67391.patch | 1449 + ...-funwind-tables-the-default-on-all-a.patch | 27 + 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch | 30 + ...Add-symbol-versioning-to-all-symbols.patch | 39 + 0001-flang-Remove-the-dependency-on-Bye.patch | 47 + ...pport-the-R_LARCH_-ADD-SUB-6-relocat.patch | 77 + ...-option-to-disable-tsan-tests-111548.patch | 62 + ...-vaddr-for-__llvm_write_binary_ids-n.patch | 86 + ...er-msan-VarArgHelper-for-loongarch64.patch | 153 + ...ngArch-Define-psABI-v2.20-relocs-for.patch | 63 + ...sanitize-cfi-icall-on-loongarch64-67.patch | 41 + ...-LoongArch-Add-LSX-intrinsic-support.patch | 2725 + ...lax-feature-and-keep-relocations-721.patch | 1 - ...oongArch-Add-some-atomic-tests-68766.patch | 4473 +- ...LoongArch-Add-LASX-intrinsic-support.patch | 2240 + ...delayed-decision-for-ADD-SUB-relocat.patch | 1 - ...t-finer-grained-DBAR-hints-for-LA664.patch | 839 + ...dd-loongarch64-to-CFI-jumptables-673.patch | 127 + ...clang-Don-t-install-static-libraries.patch | 25 + ...pport-the-R_LARCH_CALL36-relocation-.patch | 136 + 0004-CFI-Allow-LoongArch-67314.patch | 100 + ...oongArch-Add-LSX-intrinsic-testcases.patch | 9905 +++ ..._LARCH_RELAX-when-expanding-some-Loa.patch | 1 - ...mit-a-test-for-atomic-cmpxchg-optmiz.patch | 651 + ...ch-Remove-the-test-for-R_LARCH_CALL3.patch | 36 + ...ongArch-Add-LASX-intrinsic-testcases.patch | 10197 +++ ...e-codegen-for-atomic-cmpxchg-ops-693.patch | 204 + ...-AlignFragment-size-if-layout-is-ava.patch | 1 - ...LoongArch-Remove-the-test-for-R_LARC.patch | 47 + ...-Mark-several-tests-as-UNSUPPORTED-o.patch | 69 + ...stcases-of-LASX-intrinsics-with-imme.patch | 5780 ++ ...unction-call-code-sequence-as-PCADDU.patch | 442 + ...de-LoongArchTargetLowering-getExtend.patch | 1142 + ...Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch | 1 - ...-for-linux-loongarch64-in-lib-tsan-g.patch | 106 + ...laxDwarfLineAddr-and-relaxDwarfCFA-t.patch | 1 - ...-invalid-immediate-testcases-for-LSX.patch | 1220 + ...gArch-Pre-commit-test-for-76555.-NFC.patch | 192 + ...ear_cache-in-InvalidateInstructionCa.patch | 35 + ...n-Refine-fstat-64-interceptors-86625.patch | 112 + ...Generate-_mcount-instead-of-mcount-6.patch | 61 + ...-nops-and-emit-align-reloc-when-hand.patch | 1 - ...-invalid-immediate-testcases-for-LAS.patch | 1220 + ...emove-the-FIXME-in-psabi-restricted-.patch | 29 + ...mplement-emit-relocs-with-relaxation.patch | 238 + ...stcases-of-LSX-intrinsics-with-immed.patch | 5694 ++ ...td-pattern-for-CACOP-LDPTE-and-LDDIR.patch | 43 + ...ement-the-expansion-of-PseudoLA-_LAR.patch | 1278 + ...nused-variable-in-LoongArchExpandPse.patch | 38 + ...eorder-LoongArchTargetLowering-.-NFC.patch | 280 + ...dd-some-ABI-regression-tests-for-emp.patch | 86 + ...LF-Support-relax-R_LARCH_ALIGN-78692.patch | 562 + ...Add-the-support-for-vector-in-llvm17.patch | 56520 ---------------- ...I-mismatch-with-gcc-g-about-empty-st.patch | 85 + 0011-LoongArch-Fix-typos.-NFC.patch | 300 + ...rner-testcase-for-elf-getLoongArchPa.patch | 44 + ...est-Update-dwarf-loongarch-relocs.ll.patch | 1 - ...e-support-for-compiler-rt-and-bugfix.patch | 2474 - ...Arch-Pre-commit-test-for-issue-70890.patch | 46 + ...me-operations-action-for-LSX-and-LAS.patch | 138 + ...d-a-another-corner-testcase-for-elf-.patch | 53 + ...pport-the-R_LARCH_-ADD-SUB-_ULEB128-.patch | 198 + ...Support-compiler-options-mlsx-mlasx-.patch | 350 + ...I-mismatch-with-g-when-handling-empt.patch | 68 + ...LF-uleb-ehtable.s-Mach-O-to-use-priv.patch | 1 - ...ndle-extreme-code-model-relocs-accor.patch | 443 + ...Add-ABI-implementation-of-passing-ve.patch | 33 + ...oongArch-to-fno-direct-access-extern.patch | 64 + ...Add-support-for-option-msimd-and-mac.patch | 269 + ...oFoldSymbolOffsetDifference-revert-i.patch | 1 - ...Support-the-builtin-functions-for-LS.patch | 5093 ++ ...ine-MCInstrAnalysis-based-on-registe.patch | 240 + ...mit-test-case-to-show-bug-in-LoongAr.patch | 36 - ...Modify-loongarch-msimd.c-to-avoid-gr.patch | 154 + ...Support-the-builtin-functions-for-LA.patch | 5142 ++ ...n-Implement-128-bit-and-256-bit-vect.patch | 3112 +- ...e-commit-MCInstrAnalysis-tests-for-i.patch | 89 + ...ptLevel-to-LoongArchDAGToDAGISel-cor.patch | 131 - ...ch-CodeGen-Add-LSX-builtin-testcases.patch | 12430 ++++ ...le-128-bits-vector-by-default-100056.patch | 284 + ...ch-Fix-test-cases-after-2dd8460d8a36.patch | 60 - ...Barrier-to-true-for-instruction-b-72.patch | 57 + ...ongArch-V1.1-instructions-definition.patch | 432 + ...h-CodeGen-Add-LASX-builtin-testcases.patch | 11692 ++++ ...-commit-tests-for-instr-bl-fixupkind.patch | 92 + ...degen-support-for-extractelement-737.patch | 516 + ...finitions-and-feature-frecipe-for-FP.patch | 876 + ...port-to-get-the-FixupKind-for-BL-729.patch | 53 + ...me-binary-IR-instructions-testcases-.patch | 1526 + ...ify-branch-evaluation-for-MCInstrAna.patch | 134 + ...t-march-la64v1.0-and-march-la64v1.1-.patch | 240 + ...dd-codegen-support-for-insertelement.patch | 700 + ...mit-a-test-for-smul-with-overflow-NF.patch | 139 + 0021-LoongArch-Support-la664-100068.patch | 182 + ...rch-Custom-lowering-ISD-BUILD_VECTOR.patch | 1301 + ...e-mulodi4-and-muloti4-libcalls-73199.patch | 528 + ...h-Fix-test-issue-of-init-loongarch.c.patch | 26 + ...re-and-or-xor-patterns-for-vector-ty.patch | 896 + ...ttern-for-FNMSUB_-S-D-instructions-7.patch | 732 + ...-experimental-auto-vec-feature.-1000.patch | 58 + ...me-binary-IR-instructions-testcases-.patch | 1522 + ...ngArch-Fix-the-procossor-series-mask.patch | 30 + ...ure-that-the-LoongArchISD-BSTRINS-no.patch | 59 + ...de-TargetLowering-isShuffleMaskLegal.patch | 115 + ...Precommit-test-for-fix-wrong-return-.patch | 108 + ...LoongArch-Support-CTLZ-with-lsx-lasx.patch | 389 + ...Fix-wrong-return-value-type-of-__ioc.patch | 72 + ...ch-Support-MULHS-MULHU-with-lsx-lasx.patch | 408 + ...SD-VSELECT-a-legal-operation-with-ls.patch | 273 + ...degen-support-for-icmp-fcmp-with-lsx.patch | 3596 + ...SD-FSQRT-a-legal-operation-with-lsx-.patch | 373 + 0031-LoongArch-Mark-ISD-FNEG-as-legal.patch | 140 + ...degen-support-for-X-VF-MSUB-NMADD-NM.patch | 3341 + ...Arch-Fix-LASX-vector_extract-codegen.patch | 328 + ...correct-pattern-XVREPL128VEI_-W-D-in.patch | 60 + ...correct-pattern-X-VBITSELI_B-instruc.patch | 88 + ...Do-not-pass-vector-arguments-via-vec.patch | 32009 +++++++++ ...-auto-vectorization-using-LSX-LASX-w.patch | 188 + ...h-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch | 298 + ...h-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch | 306 + ...-the-incorrect-return-value-of-Loong.patch | 52 + ...gArch-Pre-commit-test-for-76913.-NFC.patch | 71 + ...ent-LoongArchRegisterInfo-canRealign.patch | 150 + CVE-2023-46049.patch | 34 - deprecated-recommonmark.patch | 25 - lit.lld-test.cfg.py | 12 + llvm.spec | 2225 +- run-lit-tests | 66 + sources | 4 +- 136 files changed, 140809 insertions(+), 67269 deletions(-) create mode 100644 0001-18-Always-build-shared-libs-for-LLD.patch create mode 100644 0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch delete mode 100644 0001-Clear-instructions-not-recorded-in-ErasedInstrs.patch create mode 100644 0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch create mode 100644 0001-Driver-Support-mcmodel-for-LoongArch-72514.patch create mode 100644 0001-ItaniumMangle-Add-substitutions-for-record-types-whe.patch create mode 100644 0001-LoongArch-Add-support-for-OpenCloudOS-triple.patch create mode 100644 0001-LoongArch-Improve-codegen-for-atomic-ops-67391.patch create mode 100644 0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch create mode 100644 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch create mode 100644 0001-clang-shlib-Add-symbol-versioning-to-all-symbols.patch create mode 100644 0001-flang-Remove-the-dependency-on-Bye.patch create mode 100644 0001-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-6-relocat.patch create mode 100644 0001-openmp-Add-option-to-disable-tsan-tests-111548.patch create mode 100644 0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch create mode 100644 0001-sanitizer-msan-VarArgHelper-for-loongarch64.patch create mode 100644 0002-BinaryFormat-LoongArch-Define-psABI-v2.20-relocs-for.patch create mode 100644 0002-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch create mode 100644 0002-LoongArch-Add-LSX-intrinsic-support.patch rename 0001-Backport-LoongArch-Add-relax-feature-and-keep-relocations.patch => 0002-LoongArch-Add-relax-feature-and-keep-relocations-721.patch (99%) rename 0013-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch => 0002-LoongArch-Add-some-atomic-tests-68766.patch (62%) create mode 100644 0003-LoongArch-Add-LASX-intrinsic-support.patch rename 0002-Backport-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocations.patch => 0003-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocat.patch (99%) create mode 100644 0003-LoongArch-Support-finer-grained-DBAR-hints-for-LA664.patch create mode 100644 0003-LowerTypeTests-Add-loongarch64-to-CFI-jumptables-673.patch create mode 100644 0003-PATCH-clang-Don-t-install-static-libraries.patch create mode 100644 0003-lld-LoongArch-Support-the-R_LARCH_CALL36-relocation-.patch create mode 100644 0004-CFI-Allow-LoongArch-67314.patch create mode 100644 0004-LoongArch-Add-LSX-intrinsic-testcases.patch rename 0003-Backport-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-LoadAddress.patch => 0004-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-Loa.patch (99%) create mode 100644 0004-LoongArch-Precommit-a-test-for-atomic-cmpxchg-optmiz.patch create mode 100644 0004-lld-test-LoongArch-Remove-the-test-for-R_LARCH_CALL3.patch create mode 100644 0005-LoongArch-Add-LASX-intrinsic-testcases.patch create mode 100644 0005-LoongArch-Improve-codegen-for-atomic-cmpxchg-ops-693.patch rename 0004-Backport-MC-LoongArch-Add-AlignFragment-size-if-layout-is-available-and-not-need-insert-nops.patch => 0005-MC-LoongArch-Add-AlignFragment-size-if-layout-is-ava.patch (98%) create mode 100644 0005-Revert-lld-test-LoongArch-Remove-the-test-for-R_LARC.patch create mode 100644 0005-test-compiler-rt-Mark-several-tests-as-UNSUPPORTED-o.patch create mode 100644 0006-LoongArch-Add-testcases-of-LASX-intrinsics-with-imme.patch create mode 100644 0006-LoongArch-Emit-function-call-code-sequence-as-PCADDU.patch create mode 100644 0006-LoongArch-Override-LoongArchTargetLowering-getExtend.patch rename 0005-Backport-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_RISCV_-SET-SUB-_ULEB128-for-uleb128-directives.patch => 0006-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch (99%) create mode 100644 0006-tsan-Add-support-for-linux-loongarch64-in-lib-tsan-g.patch rename 0006-Backport-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-to-handle-the-mutable-label-diff-in-dwarfinfo.patch => 0007-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-t.patch (99%) create mode 100644 0007-LoongArch-MC-Add-invalid-immediate-testcases-for-LSX.patch create mode 100644 0007-LoongArch-Pre-commit-test-for-76555.-NFC.patch create mode 100644 0007-Memory-Call-__clear_cache-in-InvalidateInstructionCa.patch create mode 100644 0007-tsan-Refine-fstat-64-interceptors-86625.patch create mode 100644 0008-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch rename 0007-Backport-LoongArch-Insert-nops-and-emit-align-reloc-when-handle-alignment-directive.patch => 0008-LoongArch-Insert-nops-and-emit-align-reloc-when-hand.patch (99%) create mode 100644 0008-LoongArch-MC-Add-invalid-immediate-testcases-for-LAS.patch create mode 100644 0008-LoongArch-test-Remove-the-FIXME-in-psabi-restricted-.patch create mode 100644 0009-ELF-RISCV-Implement-emit-relocs-with-relaxation.patch create mode 100644 0009-LoongArch-Add-testcases-of-LSX-intrinsics-with-immed.patch create mode 100644 0009-LoongArch-Fix-td-pattern-for-CACOP-LDPTE-and-LDDIR.patch create mode 100644 0009-LoongArch-Reimplement-the-expansion-of-PseudoLA-_LAR.patch create mode 100644 0010-LoongArch-Fix-Wunused-variable-in-LoongArchExpandPse.patch create mode 100644 0010-LoongArch-Reorder-LoongArchTargetLowering-.-NFC.patch create mode 100644 0010-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch create mode 100644 0010-lld-ELF-Support-relax-R_LARCH_ALIGN-78692.patch delete mode 100644 0011-Backport-LoongArch-Add-the-support-for-vector-in-llvm17.patch create mode 100644 0011-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch create mode 100644 0011-LoongArch-Fix-typos.-NFC.patch create mode 100644 0011-lld-ELF-Add-a-corner-testcase-for-elf-getLoongArchPa.patch rename 0008-Backport-test-Update-dwarf-loongarch-relocs.ll.patch => 0011-test-Update-dwarf-loongarch-relocs.ll.patch (98%) delete mode 100644 0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch create mode 100644 0012-LoongArch-Pre-commit-test-for-issue-70890.patch create mode 100644 0012-LoongArch-Set-some-operations-action-for-LSX-and-LAS.patch create mode 100644 0012-lld-LoongArch-Add-a-another-corner-testcase-for-elf-.patch create mode 100644 0012-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-_ULEB128-.patch create mode 100644 0013-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch create mode 100644 0013-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch rename 0009-Backport-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-private-symbols-in-.uleb128-for-label-differences.patch => 0013-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-priv.patch (97%) create mode 100644 0013-lld-LoongArch-Handle-extreme-code-model-relocs-accor.patch create mode 100644 0014-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch create mode 100644 0014-Driver-Default-LoongArch-to-fno-direct-access-extern.patch create mode 100644 0014-LoongArch-clang-Add-support-for-option-msimd-and-mac.patch rename 0010-Backport-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-isMicroMips-special-case.patch => 0014-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-i.patch (99%) create mode 100644 0015-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch create mode 100644 0015-LoongArch-MC-Refine-MCInstrAnalysis-based-on-registe.patch delete mode 100644 0015-LoongArch-Precommit-test-case-to-show-bug-in-LoongAr.patch create mode 100644 0015-LoongArch-clang-Modify-loongarch-msimd.c-to-avoid-gr.patch create mode 100644 0016-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch rename 0014-Backport-LoongArch-fix-and-add-some-new-support.patch => 0016-LoongArch-CodeGen-Implement-128-bit-and-256-bit-vect.patch (42%) create mode 100644 0016-LoongArch-NFC-Pre-commit-MCInstrAnalysis-tests-for-i.patch delete mode 100644 0016-LoongArch-Pass-OptLevel-to-LoongArchDAGToDAGISel-cor.patch create mode 100644 0017-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch create mode 100644 0017-LoongArch-Enable-128-bits-vector-by-default-100056.patch delete mode 100644 0017-LoongArch-Fix-test-cases-after-2dd8460d8a36.patch create mode 100644 0017-LoongArch-Set-isBarrier-to-true-for-instruction-b-72.patch create mode 100644 0018-LoongArch-Add-LoongArch-V1.1-instructions-definition.patch create mode 100644 0018-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch create mode 100644 0018-LoongArch-MC-Pre-commit-tests-for-instr-bl-fixupkind.patch create mode 100644 0019-LoongArch-Add-codegen-support-for-extractelement-737.patch create mode 100644 0019-LoongArch-Add-definitions-and-feature-frecipe-for-FP.patch create mode 100644 0019-LoongArch-MC-Support-to-get-the-FixupKind-for-BL-729.patch create mode 100644 0020-LoongArch-Add-some-binary-IR-instructions-testcases-.patch create mode 100644 0020-LoongArch-MC-Modify-branch-evaluation-for-MCInstrAna.patch create mode 100644 0020-LoongArch-Support-march-la64v1.0-and-march-la64v1.1-.patch create mode 100644 0021-LoongArch-Add-codegen-support-for-insertelement.patch create mode 100644 0021-LoongArch-Precommit-a-test-for-smul-with-overflow-NF.patch create mode 100644 0021-LoongArch-Support-la664-100068.patch create mode 100644 0022-LoongArch-Custom-lowering-ISD-BUILD_VECTOR.patch create mode 100644 0022-LoongArch-Disable-mulodi4-and-muloti4-libcalls-73199.patch create mode 100644 0022-LoongArch-Fix-test-issue-of-init-loongarch.c.patch create mode 100644 0023-LoongArch-Add-more-and-or-xor-patterns-for-vector-ty.patch create mode 100644 0023-LoongArch-Fix-pattern-for-FNMSUB_-S-D-instructions-7.patch create mode 100644 0023-LoongArch-Remove-experimental-auto-vec-feature.-1000.patch create mode 100644 0024-LoongArch-Add-some-binary-IR-instructions-testcases-.patch create mode 100644 0024-LoongArch-Fix-the-procossor-series-mask.patch create mode 100644 0025-LoongArch-Make-sure-that-the-LoongArchISD-BSTRINS-no.patch create mode 100644 0025-LoongArch-Override-TargetLowering-isShuffleMaskLegal.patch create mode 100644 0026-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch create mode 100644 0026-Reland-LoongArch-Support-CTLZ-with-lsx-lasx.patch create mode 100644 0027-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch create mode 100644 0027-LoongArch-Support-MULHS-MULHU-with-lsx-lasx.patch create mode 100644 0028-LoongArch-Make-ISD-VSELECT-a-legal-operation-with-ls.patch create mode 100644 0029-LoongArch-Add-codegen-support-for-icmp-fcmp-with-lsx.patch create mode 100644 0030-LoongArch-Make-ISD-FSQRT-a-legal-operation-with-lsx-.patch create mode 100644 0031-LoongArch-Mark-ISD-FNEG-as-legal.patch create mode 100644 0032-LoongArch-Add-codegen-support-for-X-VF-MSUB-NMADD-NM.patch create mode 100644 0033-LoongArch-Fix-LASX-vector_extract-codegen.patch create mode 100644 0034-LoongArch-Fix-incorrect-pattern-XVREPL128VEI_-W-D-in.patch create mode 100644 0035-LoongArch-Fix-incorrect-pattern-X-VBITSELI_B-instruc.patch create mode 100644 0036-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch create mode 100644 0037-LoongArch-Permit-auto-vectorization-using-LSX-LASX-w.patch create mode 100644 0038-CodeGen-LoongArch-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch create mode 100644 0039-CodeGen-LoongArch-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch create mode 100644 0040-LoongArch-Fixing-the-incorrect-return-value-of-Loong.patch create mode 100644 0041-LoongArch-Pre-commit-test-for-76913.-NFC.patch create mode 100644 0042-LoongArch-Implement-LoongArchRegisterInfo-canRealign.patch delete mode 100644 CVE-2023-46049.patch delete mode 100644 deprecated-recommonmark.patch create mode 100644 lit.lld-test.cfg.py create mode 100644 run-lit-tests diff --git a/0001-18-Always-build-shared-libs-for-LLD.patch b/0001-18-Always-build-shared-libs-for-LLD.patch new file mode 100644 index 0000000..1659800 --- /dev/null +++ b/0001-18-Always-build-shared-libs-for-LLD.patch @@ -0,0 +1,29 @@ +From b1c60d7fa322a2d208556087df9e7ef94bfbffb8 Mon Sep 17 00:00:00 2001 +From: Nikita Popov +Date: Wed, 8 May 2024 12:30:36 +0900 +Subject: [PATCH] Always build shared libs for LLD + +We don't want to enable BUILD_SHARED_LIBS for the whole build, +but we do want to build lld libraries. +--- + lld/cmake/modules/AddLLD.cmake | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake +index 2ee066b41535..270c03f096ac 100644 +--- a/lld/cmake/modules/AddLLD.cmake ++++ b/lld/cmake/modules/AddLLD.cmake +@@ -7,9 +7,8 @@ macro(add_lld_library name) + "" + "" + ${ARGN}) +- if(ARG_SHARED) +- set(ARG_ENABLE_SHARED SHARED) +- endif() ++ # Always build shared libs for LLD. ++ set(ARG_ENABLE_SHARED SHARED) + llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS}) + set_target_properties(${name} PROPERTIES FOLDER "lld libraries") + +-- +2.44.0 \ No newline at end of file diff --git a/0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch b/0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch new file mode 100644 index 0000000..21e2b9f --- /dev/null +++ b/0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch @@ -0,0 +1,670 @@ +From e5c03f299c1761eec0ae325d995eab121f1dd3a3 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 9 Aug 2023 16:01:37 +0800 +Subject: [PATCH 01/42] [Clang][LoongArch] Use the ClangBuiltin class to + automatically generate support for CBE and CFE + +Fixed the type modifier (L->W), removed redundant feature checking code +since the feature has already been checked in `EmitBuiltinExpr`. And +Cleaned up unused diagnostic information. + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D156866 + +(cherry picked from commit ea8d3b1f9f2d7385d97fcd34d14db0eb2cb2795c) + +--- + .../include/clang/Basic/BuiltinsLoongArch.def | 25 ++-- + .../clang/Basic/DiagnosticSemaKinds.td | 7 - + clang/lib/CodeGen/CGBuiltin.cpp | 130 ---------------- + clang/lib/CodeGen/CodeGenFunction.h | 1 - + clang/lib/Sema/SemaChecking.cpp | 50 +------ + .../CodeGen/LoongArch/intrinsic-la32-error.c | 118 +++++++-------- + llvm/include/llvm/IR/IntrinsicsLoongArch.td | 141 +++++++++--------- + llvm/lib/IR/Function.cpp | 1 + + 8 files changed, 136 insertions(+), 337 deletions(-) + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 7f2c8403410d..20510e18fe58 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -16,8 +16,7 @@ + #endif + + // TODO: Support more builtins. +-// TODO: Added feature constraints. +-TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vLiULiLi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") + TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +@@ -26,36 +25,36 @@ TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") + TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "ULiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") + +-TARGET_BUILTIN(__builtin_loongarch_lddir_d, "LiLiIULi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vLiIULi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") + + #undef BUILTIN + #undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td +index c88f25209fc0..0e97620945af 100644 +--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td ++++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td +@@ -11853,10 +11853,6 @@ def err_non_designated_init_used : Error< + def err_cast_from_randomized_struct : Error< + "casting from randomized structure pointer type %0 to %1">; + +-// LoongArch-specific Diagnostics +-def err_loongarch_builtin_requires_la64 : Error< +- "this builtin requires target: loongarch64">; +- + // Unsafe buffer usage diagnostics. + def warn_unsafe_buffer_variable : Warning< + "%0 is an %select{unsafe pointer used for buffer access|unsafe buffer that " +@@ -11872,9 +11868,6 @@ def note_unsafe_buffer_variable_fixit_group : Note< + "change type of %0 to '%select{std::span|std::array|std::span::iterator}1' to preserve bounds information%select{|, and change %2 to '%select{std::span|std::array|std::span::iterator}1' to propagate bounds information between them}3">; + def note_safe_buffer_usage_suggestions_disabled : Note< + "pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions">; +-def err_loongarch_builtin_requires_la32 : Error< +- "this builtin requires target: loongarch32">; +- + def err_builtin_pass_in_regs_non_class : Error< + "argument %0 is not an unqualified class type">; + +diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp +index 30f5f4e7061c..e512762fafaf 100644 +--- a/clang/lib/CodeGen/CGBuiltin.cpp ++++ b/clang/lib/CodeGen/CGBuiltin.cpp +@@ -43,7 +43,6 @@ + #include "llvm/IR/IntrinsicsARM.h" + #include "llvm/IR/IntrinsicsBPF.h" + #include "llvm/IR/IntrinsicsHexagon.h" +-#include "llvm/IR/IntrinsicsLoongArch.h" + #include "llvm/IR/IntrinsicsNVPTX.h" + #include "llvm/IR/IntrinsicsPowerPC.h" + #include "llvm/IR/IntrinsicsR600.h" +@@ -5588,9 +5587,6 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); +- case llvm::Triple::loongarch32: +- case llvm::Triple::loongarch64: +- return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E); + default: + return nullptr; + } +@@ -20418,129 +20414,3 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + return Builder.CreateCall(F, Ops, ""); + } +- +-Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID, +- const CallExpr *E) { +- SmallVector Ops; +- +- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) +- Ops.push_back(EmitScalarExpr(E->getArg(i))); +- +- Intrinsic::ID ID = Intrinsic::not_intrinsic; +- +- switch (BuiltinID) { +- default: +- llvm_unreachable("unexpected builtin ID."); +- case LoongArch::BI__builtin_loongarch_cacop_d: +- ID = Intrinsic::loongarch_cacop_d; +- break; +- case LoongArch::BI__builtin_loongarch_cacop_w: +- ID = Intrinsic::loongarch_cacop_w; +- break; +- case LoongArch::BI__builtin_loongarch_dbar: +- ID = Intrinsic::loongarch_dbar; +- break; +- case LoongArch::BI__builtin_loongarch_break: +- ID = Intrinsic::loongarch_break; +- break; +- case LoongArch::BI__builtin_loongarch_ibar: +- ID = Intrinsic::loongarch_ibar; +- break; +- case LoongArch::BI__builtin_loongarch_movfcsr2gr: +- ID = Intrinsic::loongarch_movfcsr2gr; +- break; +- case LoongArch::BI__builtin_loongarch_movgr2fcsr: +- ID = Intrinsic::loongarch_movgr2fcsr; +- break; +- case LoongArch::BI__builtin_loongarch_syscall: +- ID = Intrinsic::loongarch_syscall; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_b_w: +- ID = Intrinsic::loongarch_crc_w_b_w; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_h_w: +- ID = Intrinsic::loongarch_crc_w_h_w; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_w_w: +- ID = Intrinsic::loongarch_crc_w_w_w; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_d_w: +- ID = Intrinsic::loongarch_crc_w_d_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_b_w: +- ID = Intrinsic::loongarch_crcc_w_b_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_h_w: +- ID = Intrinsic::loongarch_crcc_w_h_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_w_w: +- ID = Intrinsic::loongarch_crcc_w_w_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_d_w: +- ID = Intrinsic::loongarch_crcc_w_d_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrrd_w: +- ID = Intrinsic::loongarch_csrrd_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrwr_w: +- ID = Intrinsic::loongarch_csrwr_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrxchg_w: +- ID = Intrinsic::loongarch_csrxchg_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrrd_d: +- ID = Intrinsic::loongarch_csrrd_d; +- break; +- case LoongArch::BI__builtin_loongarch_csrwr_d: +- ID = Intrinsic::loongarch_csrwr_d; +- break; +- case LoongArch::BI__builtin_loongarch_csrxchg_d: +- ID = Intrinsic::loongarch_csrxchg_d; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_b: +- ID = Intrinsic::loongarch_iocsrrd_b; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_h: +- ID = Intrinsic::loongarch_iocsrrd_h; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_w: +- ID = Intrinsic::loongarch_iocsrrd_w; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_d: +- ID = Intrinsic::loongarch_iocsrrd_d; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_b: +- ID = Intrinsic::loongarch_iocsrwr_b; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_h: +- ID = Intrinsic::loongarch_iocsrwr_h; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_w: +- ID = Intrinsic::loongarch_iocsrwr_w; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_d: +- ID = Intrinsic::loongarch_iocsrwr_d; +- break; +- case LoongArch::BI__builtin_loongarch_cpucfg: +- ID = Intrinsic::loongarch_cpucfg; +- break; +- case LoongArch::BI__builtin_loongarch_asrtle_d: +- ID = Intrinsic::loongarch_asrtle_d; +- break; +- case LoongArch::BI__builtin_loongarch_asrtgt_d: +- ID = Intrinsic::loongarch_asrtgt_d; +- break; +- case LoongArch::BI__builtin_loongarch_lddir_d: +- ID = Intrinsic::loongarch_lddir_d; +- break; +- case LoongArch::BI__builtin_loongarch_ldpte_d: +- ID = Intrinsic::loongarch_ldpte_d; +- break; +- // TODO: Support more Intrinsics. +- } +- +- assert(ID != Intrinsic::not_intrinsic); +- +- llvm::Function *F = CGM.getIntrinsic(ID); +- return Builder.CreateCall(F, Ops); +-} +diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h +index 8722fd4550e4..143e0707b942 100644 +--- a/clang/lib/CodeGen/CodeGenFunction.h ++++ b/clang/lib/CodeGen/CodeGenFunction.h +@@ -4316,7 +4316,6 @@ public: + llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue); +- llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, + llvm::AtomicOrdering &AO, + llvm::SyncScope::ID &SSID); +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index a94f009f3fa6..a8416bf4de92 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -3827,39 +3827,12 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + default: + break; + case LoongArch::BI__builtin_loongarch_cacop_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); +- [[fallthrough]]; + case LoongArch::BI__builtin_loongarch_cacop_w: { +- if (BuiltinID == LoongArch::BI__builtin_loongarch_cacop_w && +- !TI.hasFeature("32bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la32) +- << TheCall->getSourceRange(); + SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); + SemaBuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), + llvm::maxIntN(12)); + break; + } +- case LoongArch::BI__builtin_loongarch_crc_w_b_w: +- case LoongArch::BI__builtin_loongarch_crc_w_h_w: +- case LoongArch::BI__builtin_loongarch_crc_w_w_w: +- case LoongArch::BI__builtin_loongarch_crc_w_d_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_b_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_h_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_w_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_d_w: +- case LoongArch::BI__builtin_loongarch_iocsrrd_d: +- case LoongArch::BI__builtin_loongarch_iocsrwr_d: +- case LoongArch::BI__builtin_loongarch_asrtle_d: +- case LoongArch::BI__builtin_loongarch_asrtgt_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); +- break; + case LoongArch::BI__builtin_loongarch_break: + case LoongArch::BI__builtin_loongarch_dbar: + case LoongArch::BI__builtin_loongarch_ibar: +@@ -3867,35 +3840,16 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + // Check if immediate is in [0, 32767]. + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 32767); + case LoongArch::BI__builtin_loongarch_csrrd_w: +- return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); +- case LoongArch::BI__builtin_loongarch_csrwr_w: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); +- case LoongArch::BI__builtin_loongarch_csrxchg_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrrd_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); ++ case LoongArch::BI__builtin_loongarch_csrwr_w: + case LoongArch::BI__builtin_loongarch_csrwr_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); ++ case LoongArch::BI__builtin_loongarch_csrxchg_w: + case LoongArch::BI__builtin_loongarch_csrxchg_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); + case LoongArch::BI__builtin_loongarch_lddir_d: + case LoongArch::BI__builtin_loongarch_ldpte_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_loongarch_movfcsr2gr: + case LoongArch::BI__builtin_loongarch_movgr2fcsr: +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c +index 0264c2948934..db113a13eb5a 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c +@@ -1,9 +1,58 @@ + // RUN: %clang_cc1 -triple loongarch32 -emit-llvm -S -verify %s -o /dev/null ++// RUN: not %clang_cc1 -triple loongarch32 -DFEATURE_CHECK -emit-llvm %s 2>&1 \ ++// RUN: | FileCheck %s + + #include + ++#ifdef FEATURE_CHECK ++void test_feature(long *v_l, unsigned long *v_ul, int *v_i, unsigned ui, char c, short s) { ++// CHECK: error: '__builtin_loongarch_cacop_d' needs target feature 64bit ++ __builtin_loongarch_cacop_d(1, v_ul[0], 1024); ++ ++// CHECK: error: '__builtin_loongarch_crc_w_b_w' needs target feature 64bit ++ v_i[0] = __builtin_loongarch_crc_w_b_w(c, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crc_w_h_w' needs target feature 64bit ++ v_i[1] = __builtin_loongarch_crc_w_h_w(c, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crc_w_w_w' needs target feature 64bit ++ v_i[2] = __builtin_loongarch_crc_w_w_w(c, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crc_w_d_w' needs target feature 64bit ++ v_i[3] = __builtin_loongarch_crc_w_d_w(c, v_i[0]); ++ ++// CHECK: error: '__builtin_loongarch_crcc_w_b_w' needs target feature 64bit ++ v_i[4] = __builtin_loongarch_crcc_w_b_w(c, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crcc_w_h_w' needs target feature 64bit ++ v_i[5] = __builtin_loongarch_crcc_w_h_w(s, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crcc_w_w_w' needs target feature 64bit ++ v_i[6] = __builtin_loongarch_crcc_w_w_w(v_i[0], v_i[1]); ++// CHECK: error: '__builtin_loongarch_crcc_w_d_w' needs target feature 64bit ++ v_i[7] = __builtin_loongarch_crcc_w_d_w(v_l[0], v_i[0]); ++ ++// CHECK: error: '__builtin_loongarch_csrrd_d' needs target feature 64bit ++ v_ul[0] = __builtin_loongarch_csrrd_d(1); ++// CHECK: error: '__builtin_loongarch_csrwr_d' needs target feature 64bit ++ v_ul[1] = __builtin_loongarch_csrwr_d(v_ul[0], 1); ++// CHECK: error: '__builtin_loongarch_csrxchg_d' needs target feature 64bit ++ v_ul[2] = __builtin_loongarch_csrxchg_d(v_ul[0], v_ul[1], 1); ++ ++ ++// CHECK: error: '__builtin_loongarch_iocsrrd_d' needs target feature 64bit ++ v_ul[3] = __builtin_loongarch_iocsrrd_d(ui); ++// CHECK: error: '__builtin_loongarch_iocsrwr_d' needs target feature 64bit ++ __builtin_loongarch_iocsrwr_d(v_ul[0], ui); ++ ++// CHECK: error: '__builtin_loongarch_asrtle_d' needs target feature 64bit ++ __builtin_loongarch_asrtle_d(v_l[0], v_l[1]); ++// CHECK: error: '__builtin_loongarch_asrtgt_d' needs target feature 64bit ++ __builtin_loongarch_asrtgt_d(v_l[0], v_l[1]); ++ ++// CHECK: error: '__builtin_loongarch_lddir_d' needs target feature 64bit ++ v_ul[4] = __builtin_loongarch_lddir_d(v_l[0], 1); ++// CHECK: error: '__builtin_loongarch_ldpte_d' needs target feature 64bit ++ __builtin_loongarch_ldpte_d(v_l[0], 1); ++} ++#endif ++ + void cacop_d(unsigned long int a) { +- __builtin_loongarch_cacop_d(1, a, 1024); // expected-error {{this builtin requires target: loongarch64}} + __builtin_loongarch_cacop_w(-1, a, 1024); // expected-error {{argument value -1 is outside the valid range [0, 31]}} + __builtin_loongarch_cacop_w(32, a, 1024); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __builtin_loongarch_cacop_w(1, a, -4096); // expected-error {{argument value -4096 is outside the valid range [-2048, 2047]}} +@@ -47,49 +96,6 @@ void syscall(int a) { + __builtin_loongarch_syscall(a); // expected-error {{argument to '__builtin_loongarch_syscall' must be a constant integer}} + } + +-int crc_w_b_w(char a, int b) { +- return __builtin_loongarch_crc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crc_w_h_w(short a, int b) { +- return __builtin_loongarch_crc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crc_w_w_w(int a, int b) { +- return __builtin_loongarch_crc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crc_w_d_w(long int a, int b) { +- return __builtin_loongarch_crc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +-int crcc_w_b_w(char a, int b) { +- return __builtin_loongarch_crcc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crcc_w_h_w(short a, int b) { +- return __builtin_loongarch_crcc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crcc_w_w_w(int a, int b) { +- return __builtin_loongarch_crcc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crcc_w_d_w(long int a, int b) { +- return __builtin_loongarch_crcc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-unsigned long int csrrd_d() { +- return __builtin_loongarch_csrrd_d(1); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-unsigned long int csrwr_d(unsigned long int a) { +- return __builtin_loongarch_csrwr_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) { +- return __builtin_loongarch_csrxchg_d(a, b, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- + void csrrd_w(int a) { + __builtin_loongarch_csrrd_w(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrrd_w(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} +@@ -108,30 +114,6 @@ void csrxchg_w(unsigned int a, unsigned int b) { + __builtin_loongarch_csrxchg_w(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_w' must be a constant integer}} + } + +-unsigned long int iocsrrd_d(unsigned int a) { +- return __builtin_loongarch_iocsrrd_d(a); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void iocsrwr_d(unsigned long int a, unsigned int b) { +- __builtin_loongarch_iocsrwr_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void asrtle_d(long int a, long int b) { +- __builtin_loongarch_asrtle_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void asrtgt_d(long int a, long int b) { +- __builtin_loongarch_asrtgt_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void lddir_d(long int a, int b) { +- __builtin_loongarch_lddir_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void ldpte_d(long int a, int b) { +- __builtin_loongarch_ldpte_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- + void rdtime_d() { + __rdtime_d(); // expected-error {{call to undeclared function '__rdtime_d'}} + } +diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +index 5edce3c529e1..4219b2f55346 100644 +--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td ++++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +@@ -51,74 +51,75 @@ defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics; + //===----------------------------------------------------------------------===// + // LoongArch BASE + +-def int_loongarch_break : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +-def int_loongarch_cacop_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], +- [ImmArg>, ImmArg>]>; +-def int_loongarch_cacop_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +- [ImmArg>, ImmArg>]>; +-def int_loongarch_dbar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +-def int_loongarch_ibar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +-def int_loongarch_movfcsr2gr : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_movgr2fcsr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_syscall : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +- +-def int_loongarch_crc_w_b_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crc_w_h_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crc_w_w_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crc_w_d_w : Intrinsic<[llvm_i32_ty], +- [llvm_i64_ty, llvm_i32_ty]>; +- +-def int_loongarch_crcc_w_b_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crcc_w_h_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crcc_w_w_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crcc_w_d_w : Intrinsic<[llvm_i32_ty], +- [llvm_i64_ty, llvm_i32_ty]>; +- +-def int_loongarch_csrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrwr_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrwr_d : Intrinsic<[llvm_i64_ty], +- [llvm_i64_ty, llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrxchg_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty, +- llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrxchg_d : Intrinsic<[llvm_i64_ty], +- [llvm_i64_ty, llvm_i64_ty, +- llvm_i32_ty], +- [ImmArg>]>; +- +-def int_loongarch_iocsrrd_b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +-def int_loongarch_iocsrrd_h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +-def int_loongarch_iocsrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +-def int_loongarch_iocsrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty]>; +- +-def int_loongarch_iocsrwr_b : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_iocsrwr_h : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_iocsrwr_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_iocsrwr_d : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty]>; +- +-def int_loongarch_cpucfg : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +- +-def int_loongarch_asrtle_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; +-def int_loongarch_asrtgt_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; +- +-def int_loongarch_lddir_d : Intrinsic<[llvm_i64_ty], +- [llvm_i64_ty, llvm_i64_ty], +- [ImmArg>]>; +-def int_loongarch_ldpte_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], +- [ImmArg>]>; ++class BaseInt ret_types, list param_types, ++ list intr_properties = []> ++ : Intrinsic, ++ ClangBuiltin; ++ ++def int_loongarch_break : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; ++def int_loongarch_cacop_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], ++ [ImmArg>, ImmArg>]>; ++def int_loongarch_cacop_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [ImmArg>, ImmArg>]>; ++def int_loongarch_dbar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; ++ ++def int_loongarch_ibar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; ++def int_loongarch_movfcsr2gr : BaseInt<[llvm_i32_ty], [llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_movgr2fcsr : BaseInt<[], [llvm_i32_ty, llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_syscall : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; ++ ++def int_loongarch_crc_w_b_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crc_w_h_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crc_w_w_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crc_w_d_w : BaseInt<[llvm_i32_ty], ++ [llvm_i64_ty, llvm_i32_ty]>; ++ ++def int_loongarch_crcc_w_b_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crcc_w_h_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crcc_w_w_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crcc_w_d_w : BaseInt<[llvm_i32_ty], ++ [llvm_i64_ty, llvm_i32_ty]>; ++ ++def int_loongarch_csrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrwr_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrwr_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrxchg_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrxchg_d : BaseInt<[llvm_i64_ty], ++ [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [ImmArg>]>; ++ ++def int_loongarch_iocsrrd_b : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; ++def int_loongarch_iocsrrd_h : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; ++def int_loongarch_iocsrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; ++def int_loongarch_iocsrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty]>; ++ ++def int_loongarch_iocsrwr_b : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_iocsrwr_h : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_iocsrwr_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_iocsrwr_d : BaseInt<[], [llvm_i64_ty, llvm_i32_ty]>; ++ ++def int_loongarch_cpucfg : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; ++ ++def int_loongarch_asrtle_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; ++def int_loongarch_asrtgt_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; ++ ++def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], ++ [ImmArg>]>; ++def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], ++ [ImmArg>]>; + } // TargetPrefix = "loongarch" +diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp +index 27219e89dc5f..435800d9e5f9 100644 +--- a/llvm/lib/IR/Function.cpp ++++ b/llvm/lib/IR/Function.cpp +@@ -37,6 +37,7 @@ + #include "llvm/IR/IntrinsicsBPF.h" + #include "llvm/IR/IntrinsicsDirectX.h" + #include "llvm/IR/IntrinsicsHexagon.h" ++#include "llvm/IR/IntrinsicsLoongArch.h" + #include "llvm/IR/IntrinsicsMips.h" + #include "llvm/IR/IntrinsicsNVPTX.h" + #include "llvm/IR/IntrinsicsPowerPC.h" +-- +2.20.1 + diff --git a/0001-Clear-instructions-not-recorded-in-ErasedInstrs.patch b/0001-Clear-instructions-not-recorded-in-ErasedInstrs.patch deleted file mode 100644 index 54fe687..0000000 --- a/0001-Clear-instructions-not-recorded-in-ErasedInstrs.patch +++ /dev/null @@ -1,287 +0,0 @@ -From 5704a77a9e8330aae5b6fe8d57056e992655624c Mon Sep 17 00:00:00 2001 -From: doupengda -Date: Thu, 5 Sep 2024 19:28:44 +0800 -Subject: [PATCH] Clear instructions not recorded in ErasedInstrs - ---- - llvm/lib/CodeGen/RegisterCoalescer.cpp | 28 ++- - .../register-coalescer-crash-pr79718.mir | 194 ++++++++++++++++++ - 2 files changed, 216 insertions(+), 6 deletions(-) - create mode 100644 test/CodeGen/LoongArch/register-coalescer-crash-pr79718.mir - -diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp -index e49885b6..511a7630 100644 ---- a/llvm/lib/CodeGen/RegisterCoalescer.cpp -+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp -@@ -236,8 +236,8 @@ namespace { - /// was successfully coalesced away. If it is not currently possible to - /// coalesce this interval, but it may be possible if other things get - /// coalesced, then it returns true by reference in 'Again'. -- bool joinCopy(MachineInstr *CopyMI, bool &Again); -- -+ bool joinCopy(MachineInstr *CopyMI, bool &Again, -+ SmallPtrSetImpl &CurrentErasedInstrs); - /// Attempt to join these two intervals. On failure, this - /// returns false. The output "SrcInt" will not have been modified, so we - /// can use this information below to update aliases. -@@ -1899,7 +1899,9 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, - LIS->shrinkToUses(&LI); - } - --bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { -+bool RegisterCoalescer::joinCopy( -+ MachineInstr *CopyMI, bool &Again, -+ SmallPtrSetImpl &CurrentErasedInstrs) { - Again = false; - LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); - -@@ -2091,7 +2093,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { - // CopyMI has been erased by joinIntervals at this point. Remove it from - // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back - // to the work list. This keeps ErasedInstrs from growing needlessly. -- ErasedInstrs.erase(CopyMI); -+ if (ErasedInstrs.erase(CopyMI)) -+ // But we may encounter the instruction again in this iteration. -+ CurrentErasedInstrs.insert(CopyMI); - - // Rewrite all SrcReg operands to DstReg. - // Also update DstReg operands to include DstIdx if it is set. -@@ -3915,21 +3919,33 @@ void RegisterCoalescer::lateLiveIntervalUpdate() { - bool RegisterCoalescer:: - copyCoalesceWorkList(MutableArrayRef CurrList) { - bool Progress = false; -+ SmallPtrSet CurrentErasedInstrs; - for (MachineInstr *&MI : CurrList) { - if (!MI) - continue; - // Skip instruction pointers that have already been erased, for example by - // dead code elimination. -- if (ErasedInstrs.count(MI)) { -+ if (ErasedInstrs.count(MI) || CurrentErasedInstrs.count(MI)) { - MI = nullptr; - continue; - } - bool Again = false; -- bool Success = joinCopy(MI, Again); -+ bool Success = joinCopy(MI, Again, CurrentErasedInstrs); - Progress |= Success; - if (Success || !Again) - MI = nullptr; - } -+ // Clear instructions not recorded in `ErasedInstrs` but erased. -+ if (!CurrentErasedInstrs.empty()) { -+ for (MachineInstr *&MI : CurrList) { -+ if (MI && CurrentErasedInstrs.count(MI)) -+ MI = nullptr; -+ } -+ for (MachineInstr *&MI : WorkList) { -+ if (MI && CurrentErasedInstrs.count(MI)) -+ MI = nullptr; -+ } -+ } - return Progress; - } - -diff --git a/llvm/test/CodeGen/LoongArch/register-coalescer-crash-pr79718.mir b/llvmtest/CodeGen/LoongArch/register-coalescer-crash-pr79718.mir -new file mode 100644 -index 00000000..2776d1a5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/register-coalescer-crash-pr79718.mir -@@ -0,0 +1,194 @@ -+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -+# RUN: llc -o - %s -mtriple=loongarch64 \ -+# RUN: -run-pass=register-coalescer -join-liveintervals=1 -join-splitedges=0 | FileCheck %s -+ -+--- -+name: foo -+tracksRegLiveness: true -+body: | -+ ; CHECK-LABEL: name: foo -+ ; CHECK: bb.0: -+ ; CHECK-NEXT: successors: %bb.1(0x80000000) -+ ; CHECK-NEXT: liveins: $r4, $r5, $r6, $r7, $r8 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r8 -+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r7 -+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r6 -+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $r5 -+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $r4 -+ ; CHECK-NEXT: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1 -+ ; CHECK-NEXT: [[ORI:%[0-9]+]]:gpr = ORI $r0, 1 -+ ; CHECK-NEXT: [[ANDI1:%[0-9]+]]:gpr = ANDI [[COPY2]], 1 -+ ; CHECK-NEXT: [[ANDI2:%[0-9]+]]:gpr = ANDI [[COPY1]], 1 -+ ; CHECK-NEXT: [[ANDI3:%[0-9]+]]:gpr = ANDI [[COPY]], 1 -+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr = COPY $r0 -+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY $r0 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.1: -+ ; CHECK-NEXT: successors: %bb.2(0x80000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr = COPY [[COPY5]] -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.2: -+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: BEQZ [[ANDI]], %bb.4 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.3: -+ ; CHECK-NEXT: successors: %bb.9(0x80000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: PseudoBR %bb.9 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.4: -+ ; CHECK-NEXT: successors: %bb.5(0x80000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.5: -+ ; CHECK-NEXT: successors: %bb.7(0x7c000000), %bb.6(0x04000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: dead [[LD_D:%[0-9]+]]:gpr = LD_D $r0, 8 -+ ; CHECK-NEXT: dead [[LD_D1:%[0-9]+]]:gpr = LD_D $r0, 0 -+ ; CHECK-NEXT: BNEZ [[ANDI1]], %bb.7 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.6: -+ ; CHECK-NEXT: successors: %bb.11(0x80000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY $r0 -+ ; CHECK-NEXT: PseudoBR %bb.11 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.7: -+ ; CHECK-NEXT: successors: %bb.8(0x7c000000), %bb.10(0x04000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: BEQZ [[ANDI2]], %bb.10 -+ ; CHECK-NEXT: PseudoBR %bb.8 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.8: -+ ; CHECK-NEXT: successors: %bb.9(0x04000000), %bb.5(0x7c000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = ADDI_D [[COPY6]], 1 -+ ; CHECK-NEXT: BEQZ [[ANDI3]], %bb.5 -+ ; CHECK-NEXT: PseudoBR %bb.9 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.9: -+ ; CHECK-NEXT: successors: %bb.12(0x80000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: ST_B $r0, [[COPY4]], 0 -+ ; CHECK-NEXT: PseudoBR %bb.12 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.10: -+ ; CHECK-NEXT: successors: %bb.11(0x80000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr = ADDI_D [[COPY6]], 1 -+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY [[ORI]] -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.11: -+ ; CHECK-NEXT: successors: %bb.12(0x80000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: ST_D $r0, [[COPY4]], 0 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: bb.12: -+ ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.1(0x04000000) -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: BEQ [[COPY7]], [[ORI]], %bb.2 -+ ; CHECK-NEXT: PseudoBR %bb.1 -+ bb.0: -+ liveins: $r4, $r5, $r6, $r7, $r8 -+ %0:gpr = COPY killed $r8 -+ %1:gpr = COPY killed $r7 -+ %2:gpr = COPY killed $r6 -+ %3:gpr = COPY killed $r5 -+ %4:gpr = COPY killed $r4 -+ %5:gpr = COPY $r0 -+ %6:gpr = COPY killed %5 -+ %7:gpr = ANDI killed %3, 1 -+ %8:gpr = ORI $r0, 1 -+ %9:gpr = ANDI killed %2, 1 -+ %10:gpr = ANDI killed %1, 1 -+ %11:gpr = ANDI killed %0, 1 -+ %12:gpr = COPY %6 -+ %13:gpr = COPY killed %6 -+ %14:gpr = IMPLICIT_DEF -+ bb.1: -+ %15:gpr = COPY killed %14 -+ %16:gpr = COPY killed %13 -+ %17:gpr = COPY killed %12 -+ %18:gpr = COPY %17 -+ %19:gpr = COPY %16 -+ %20:gpr = COPY killed %16 -+ %21:gpr = COPY killed %15 -+ bb.2: -+ successors: %bb.3, %bb.4 -+ %22:gpr = COPY killed %21 -+ %23:gpr = COPY killed %20 -+ %24:gpr = COPY killed %19 -+ %25:gpr = COPY killed %18 -+ BEQZ %7, %bb.4 -+ bb.3: -+ %26:gpr = COPY killed %24 -+ %27:gpr = COPY killed %23 -+ PseudoBR %bb.9 -+ bb.4: -+ %28:gpr = COPY killed %23 -+ bb.5: -+ successors: %bb.7(0x7c000000), %bb.6(0x04000000) -+ %29:gpr = COPY killed %28 -+ dead %30:gpr = LD_D $r0, 8 -+ dead %31:gpr = LD_D $r0, 0 -+ BNEZ %9, %bb.7 -+ bb.6: -+ %32:gpr = COPY $r0 -+ %33:gpr = COPY killed %32 -+ %34:gpr = COPY killed %33 -+ %35:gpr = COPY killed %22 -+ PseudoBR %bb.11 -+ bb.7: -+ successors: %bb.8(0x7c000000), %bb.10(0x04000000) -+ BEQZ %10, %bb.10 -+ PseudoBR %bb.8 -+ bb.8: -+ successors: %bb.9(0x04000000), %bb.5(0x7c000000) -+ %36:gpr = ADDI_D killed %29, 1 -+ %28:gpr = COPY %36 -+ %26:gpr = COPY %36 -+ %27:gpr = COPY killed %36 -+ BEQZ %11, %bb.5 -+ PseudoBR %bb.9 -+ bb.9: -+ %37:gpr = COPY killed %27 -+ %38:gpr = COPY killed %26 -+ %39:gpr = COPY $r0 -+ ST_B killed %39, %4, 0 -+ %40:gpr = COPY killed %25 -+ %41:gpr = COPY killed %38 -+ %42:gpr = COPY killed %37 -+ %43:gpr = COPY killed %22 -+ PseudoBR %bb.12 -+ bb.10: -+ %44:gpr = ADDI_D killed %29, 1 -+ %34:gpr = COPY %8 -+ %35:gpr = COPY killed %44 -+ bb.11: -+ %45:gpr = COPY killed %35 -+ %46:gpr = COPY killed %34 -+ %47:gpr = COPY $r0 -+ ST_D killed %47, %4, 0 -+ %40:gpr = COPY %45 -+ %41:gpr = COPY %46 -+ %42:gpr = COPY killed %46 -+ %43:gpr = COPY killed %45 -+ bb.12: -+ successors: %bb.2(0x7c000000), %bb.1(0x04000000) -+ %48:gpr = COPY killed %43 -+ %49:gpr = COPY killed %42 -+ %50:gpr = COPY killed %41 -+ %51:gpr = COPY killed %40 -+ %12:gpr = COPY %51 -+ %13:gpr = COPY %50 -+ %14:gpr = COPY %48 -+ %18:gpr = COPY killed %51 -+ %19:gpr = COPY killed %50 -+ %20:gpr = COPY killed %49 -+ %21:gpr = COPY killed %48 -+ BEQ %17, %8, %bb.2 -+ PseudoBR %bb.1 -+... --- -2.41.1 - diff --git a/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch b/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch new file mode 100644 index 0000000..92ab81c --- /dev/null +++ b/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch @@ -0,0 +1,41 @@ +From 73d3b4047d757ef35850e2cef38285b96be82f0f Mon Sep 17 00:00:00 2001 +From: Nikita Popov +Date: Tue, 23 May 2023 12:17:29 +0200 +Subject: [PATCH] [Driver] Give devtoolset path precedence over InstalledDir + +This is a followup to the change from c5fe10f365247c3dd9416b7ec8bad73a60b5946e. +While that commit correctly adds the bindir from devtoolset to the +path, the driver dir / install dir still comes first. This means +we'll still end up picking /usr/bin/ld rather than the one from +devtoolset. + +Unfortunately, I don't see any way to test this. In the environment +the tests are run, this would only result in a behavior difference +if there is an ld binary present in the LLVM build directory, which +isn't the case. + +Differential Revision: https://reviews.llvm.org/D151203 +--- + clang/lib/Driver/ToolChains/Linux.cpp | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp +index 853ff99d9fe5..aecabb46d4b9 100644 +--- a/clang/lib/Driver/ToolChains/Linux.cpp ++++ b/clang/lib/Driver/ToolChains/Linux.cpp +@@ -244,9 +244,9 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) + // With devtoolset on RHEL, we want to add a bin directory that is relative + // to the detected gcc install, because if we are using devtoolset gcc then + // we want to use other tools from devtoolset (e.g. ld) instead of the +- // standard system tools. +- PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + +- "/../bin").str()); ++ // standard system tools. This should take precedence over InstalledDir. ++ PPaths.insert(PPaths.begin(), ++ Twine(GCCInstallation.getParentLibPath() + "/../bin").str()); + + if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb) + ExtraOpts.push_back("-X"); +-- +2.40.1 + diff --git a/0001-Driver-Support-mcmodel-for-LoongArch-72514.patch b/0001-Driver-Support-mcmodel-for-LoongArch-72514.patch new file mode 100644 index 0000000..518f989 --- /dev/null +++ b/0001-Driver-Support-mcmodel-for-LoongArch-72514.patch @@ -0,0 +1,110 @@ +From 2cdfabe66e345304bf05e54a0127b65bc26ce8f5 Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Thu, 30 Nov 2023 14:08:45 +0800 +Subject: [PATCH 01/23] [Driver] Support -mcmodel= for LoongArch (#72514) + +7e42545 rejects unsupported mcmodel options, but normal/medium/extreme +should be supported models for LoongArch according to [gcc +document](https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html). + +The mappings among `gcc`, `clang driver`, `clang cc1` and `LLVM (i.e. +llc --code-model=)` are: + +| gcc | clang driver | clang cc1 | LLVM | +| ------------- | ------------------ | ----------------- | -------------- | +| normal | normal | small | small | +| medium | medium | medium | medium | +| extreme | extreme | large | large | + +(cherry picked from commit 1296d20adfb0978afe38d67efab9818079d870ca) + +--- + clang/lib/Driver/ToolChains/Clang.cpp | 38 ++++++++++++++++++++------- + clang/test/Driver/mcmodel.c | 15 +++++++++++ + 2 files changed, 44 insertions(+), 9 deletions(-) + +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index 777e21af98df..dd989c25567e 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -5663,18 +5663,38 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + + if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) { + StringRef CM = A->getValue(); +- if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" || +- CM == "tiny") { +- if (Triple.isOSAIX() && CM == "medium") +- CmdArgs.push_back("-mcmodel=large"); +- else if (Triple.isAArch64() && (CM == "kernel" || CM == "medium")) ++ if (Triple.isLoongArch()) { ++ bool Ok = false; ++ if (CM == "extreme" && ++ Args.hasFlagNoClaim(options::OPT_fplt, options::OPT_fno_plt, false)) ++ D.Diag(diag::err_drv_argument_not_allowed_with) ++ << A->getAsString(Args) << "-fplt"; ++ Ok = CM == "normal" || CM == "medium" || CM == "extreme"; ++ // Convert to LLVM recognizable names. ++ if (Ok) { ++ CM = llvm::StringSwitch(CM) ++ .Case("normal", "small") ++ .Case("extreme", "large") ++ .Default(CM); ++ CmdArgs.push_back(Args.MakeArgString("-mcmodel=" + CM)); ++ } else { + D.Diag(diag::err_drv_invalid_argument_to_option) + << CM << A->getOption().getName(); +- else +- A->render(Args, CmdArgs); ++ } + } else { +- D.Diag(diag::err_drv_invalid_argument_to_option) +- << CM << A->getOption().getName(); ++ if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" || ++ CM == "tiny") { ++ if (Triple.isOSAIX() && CM == "medium") ++ CmdArgs.push_back("-mcmodel=large"); ++ else if (Triple.isAArch64() && (CM == "kernel" || CM == "medium")) ++ D.Diag(diag::err_drv_invalid_argument_to_option) ++ << CM << A->getOption().getName(); ++ else ++ A->render(Args, CmdArgs); ++ } else { ++ D.Diag(diag::err_drv_invalid_argument_to_option) ++ << CM << A->getOption().getName(); ++ } + } + } + +diff --git a/clang/test/Driver/mcmodel.c b/clang/test/Driver/mcmodel.c +index 63b432036159..4aada126cf06 100644 +--- a/clang/test/Driver/mcmodel.c ++++ b/clang/test/Driver/mcmodel.c +@@ -8,6 +8,14 @@ + // RUN: not %clang -c -mcmodel=lager %s 2>&1 | FileCheck --check-prefix=INVALID %s + // RUN: not %clang -c --target=aarch64 -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=AARCH64-MEDIUM %s + // RUN: not %clang -c --target=aarch64 -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=AARCH64-KERNEL %s ++// RUN: %clang --target=loongarch64 -### -S -mcmodel=normal %s 2>&1 | FileCheck --check-prefix=SMALL %s ++// RUN: %clang --target=loongarch64 -### -S -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=MEDIUM %s ++// RUN: %clang --target=loongarch64 -### -S -mcmodel=extreme %s 2>&1 | FileCheck --check-prefix=LARGE %s ++// RUN: not %clang -c --target=loongarch64 -mcmodel=tiny %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-TINY %s ++// RUN: not %clang -c --target=loongarch64 -mcmodel=small %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-SMALL %s ++// RUN: not %clang -c --target=loongarch64 -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-KERNEL %s ++// RUN: not %clang -c --target=loongarch64 -mcmodel=large %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-LARGE %s ++// RUN: not %clang -c --target=loongarch64 -mcmodel=extreme -fplt %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-PLT-EXTREME %s + + // TINY: "-mcmodel=tiny" + // SMALL: "-mcmodel=small" +@@ -20,3 +28,10 @@ + + // AARCH64-MEDIUM: error: invalid argument 'medium' to -mcmodel= + // AARCH64-KERNEL: error: invalid argument 'kernel' to -mcmodel= ++ ++// ERR-LOONGARCH64-TINY: error: invalid argument 'tiny' to -mcmodel= ++// ERR-LOONGARCH64-SMALL: error: invalid argument 'small' to -mcmodel= ++// ERR-LOONGARCH64-KERNEL: error: invalid argument 'kernel' to -mcmodel= ++// ERR-LOONGARCH64-LARGE: error: invalid argument 'large' to -mcmodel= ++ ++// ERR-LOONGARCH64-PLT-EXTREME: error: invalid argument '-mcmodel=extreme' not allowed with '-fplt' +-- +2.20.1 + diff --git a/0001-ItaniumMangle-Add-substitutions-for-record-types-whe.patch b/0001-ItaniumMangle-Add-substitutions-for-record-types-whe.patch new file mode 100644 index 0000000..b60a5b0 --- /dev/null +++ b/0001-ItaniumMangle-Add-substitutions-for-record-types-whe.patch @@ -0,0 +1,121 @@ +From 087299fb711be190128a8ab77c84f1b4db0ea9da Mon Sep 17 00:00:00 2001 +From: tcwzxx +Date: Sun, 29 Sep 2024 11:43:28 +0800 +Subject: [PATCH] [ItaniumMangle] Add substitutions for record types when + mangling vtables (#109970) + +Fix #108015 + +The `mangleNameOrStandardSubstitution` function does not add the RD type +into the substitution, which causes the mangling of the \ to +be incorrect. +Rename `mangleNameOrStandardSubstitution` to `mangleCXXRecordDecl` and add `Record` as a substitution +--- + clang/lib/AST/ItaniumMangle.cpp | 20 ++++++++++++-------- + clang/test/CodeGenCXX/mangle-subst.cpp | 26 ++++++++++++++++++++++++++ + 2 files changed, 38 insertions(+), 8 deletions(-) + +diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp +index f08286a0d4ba..389c3b7db5c9 100644 +--- a/clang/lib/AST/ItaniumMangle.cpp ++++ b/clang/lib/AST/ItaniumMangle.cpp +@@ -448,7 +448,7 @@ public: + void mangleSeqID(unsigned SeqID); + void mangleName(GlobalDecl GD); + void mangleType(QualType T); +- void mangleNameOrStandardSubstitution(const NamedDecl *ND); ++ void mangleCXXRecordDecl(const CXXRecordDecl *Record); + void mangleLambdaSig(const CXXRecordDecl *Lambda); + void mangleModuleNamePrefix(StringRef Name, bool IsPartition = false); + +@@ -2910,9 +2910,13 @@ void CXXNameMangler::mangleType(QualType T) { + addSubstitution(T); + } + +-void CXXNameMangler::mangleNameOrStandardSubstitution(const NamedDecl *ND) { +- if (!mangleStandardSubstitution(ND)) +- mangleName(ND); ++void CXXNameMangler::mangleCXXRecordDecl(const CXXRecordDecl *Record) { ++ if (mangleSubstitution(Record)) ++ return; ++ mangleName(Record); ++ if (isCompatibleWith(LangOptions::ClangABI::Ver19)) ++ return; ++ addSubstitution(Record); + } + + void CXXNameMangler::mangleType(const BuiltinType *T) { +@@ -6678,7 +6682,7 @@ void ItaniumMangleContextImpl::mangleCXXVTable(const CXXRecordDecl *RD, + // ::= TV # virtual table + CXXNameMangler Mangler(*this, Out); + Mangler.getStream() << "_ZTV"; +- Mangler.mangleNameOrStandardSubstitution(RD); ++ Mangler.mangleCXXRecordDecl(RD); + } + + void ItaniumMangleContextImpl::mangleCXXVTT(const CXXRecordDecl *RD, +@@ -6686,7 +6690,7 @@ void ItaniumMangleContextImpl::mangleCXXVTT(const CXXRecordDecl *RD, + // ::= TT # VTT structure + CXXNameMangler Mangler(*this, Out); + Mangler.getStream() << "_ZTT"; +- Mangler.mangleNameOrStandardSubstitution(RD); ++ Mangler.mangleCXXRecordDecl(RD); + } + + void ItaniumMangleContextImpl::mangleCXXCtorVTable(const CXXRecordDecl *RD, +@@ -6696,10 +6700,10 @@ void ItaniumMangleContextImpl::mangleCXXCtorVTable(const CXXRecordDecl *RD, + // ::= TC _ + CXXNameMangler Mangler(*this, Out); + Mangler.getStream() << "_ZTC"; +- Mangler.mangleNameOrStandardSubstitution(RD); ++ Mangler.mangleCXXRecordDecl(RD); + Mangler.getStream() << Offset; + Mangler.getStream() << '_'; +- Mangler.mangleNameOrStandardSubstitution(Type); ++ Mangler.mangleCXXRecordDecl(Type); + } + + void ItaniumMangleContextImpl::mangleCXXRTTI(QualType Ty, raw_ostream &Out) { +diff --git a/clang/test/CodeGenCXX/mangle-subst.cpp b/clang/test/CodeGenCXX/mangle-subst.cpp +index 20f33a72fff8..43b48b6c72f8 100644 +--- a/clang/test/CodeGenCXX/mangle-subst.cpp ++++ b/clang/test/CodeGenCXX/mangle-subst.cpp +@@ -1,5 +1,8 @@ + // RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 | FileCheck %s + ++//CHECK: @_ZTCN16MangleCtorVTable4InstE0_NS_1A4ImplINS1_4WrapEEE ++//CHECK-CLANG-19: @_ZTCN16MangleCtorVTable4InstE0_NS_1A4ImplINS0_4WrapEEE ++ + struct X {}; + + // CHECK-LABEL: define{{.*}} void @_Z1f1XS_( +@@ -96,3 +99,26 @@ typename X::template Y::type f(typename X::template Y::type2) { retu + // CHECK: @_ZN12ManglePrefix1fIiEENS_1XIT_E1YIS2_E4typeENS5_5type2E + template int f(int); + } ++ ++namespace MangleCtorVTable { ++namespace A { ++ ++class VBase { ++ public: ++ virtual ~VBase() {}; ++}; ++ ++struct Wrap {}; ++ ++template ++class Impl : public virtual VBase { ++ public: ++}; ++ ++} // namespace A ++ ++struct Inst : public A::Impl {}; ++ ++void Test() { Inst a; } ++ ++} +-- +2.27.0 + diff --git a/0001-LoongArch-Add-support-for-OpenCloudOS-triple.patch b/0001-LoongArch-Add-support-for-OpenCloudOS-triple.patch new file mode 100644 index 0000000..3f04c2a --- /dev/null +++ b/0001-LoongArch-Add-support-for-OpenCloudOS-triple.patch @@ -0,0 +1,26 @@ +From f30dd9ba58be5615e3b95982b316244df8a43aae Mon Sep 17 00:00:00 2001 +From: Ami-zhang +Date: Tue, 28 May 2024 11:20:27 +0800 +Subject: [PATCH] [LoongArch] Add support for OpenCloudOS triple + +--- + clang/lib/Driver/ToolChains/Gnu.cpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp +index 40038dce47d8..e9e20047654e 100644 +--- a/clang/lib/Driver/ToolChains/Gnu.cpp ++++ b/clang/lib/Driver/ToolChains/Gnu.cpp +@@ -2341,7 +2341,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + + static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; + static const char *const LoongArch64Triples[] = { +- "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu"}; ++ "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu", ++ "loongarch64-OpenCloudOS-linux"}; + + static const char *const M68kLibDirs[] = {"/lib"}; + static const char *const M68kTriples[] = { +-- +2.20.1 + diff --git a/0001-LoongArch-Improve-codegen-for-atomic-ops-67391.patch b/0001-LoongArch-Improve-codegen-for-atomic-ops-67391.patch new file mode 100644 index 0000000..b9975ae --- /dev/null +++ b/0001-LoongArch-Improve-codegen-for-atomic-ops-67391.patch @@ -0,0 +1,1449 @@ +From 0a14b9c4408974055f7ab42501d45073808077eb Mon Sep 17 00:00:00 2001 +From: hev +Date: Wed, 11 Oct 2023 10:24:18 +0800 +Subject: [PATCH 1/7] [LoongArch] Improve codegen for atomic ops (#67391) + +This PR improves memory barriers generated by atomic operations. + +Memory barrier semantics of LL/SC: +``` +LL: + +SC: + +``` + +Changes: +* Remove unnecessary memory barriers before LL and between LL/SC. +* Fix acquire semantics. (If the SC instruction is not executed, then +the guarantee of acquiring semantics cannot be ensured. Therefore, an +acquire barrier needs to be generated when memory ordering includes an +acquire operation.) + +(cherry picked from commit 203ba238e33c570dba6cbcf247f1668bb2a13c26) + +--- + .../LoongArchExpandAtomicPseudoInsts.cpp | 50 +-- + .../Target/LoongArch/LoongArchInstrInfo.td | 24 +- + .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 24 +- + .../ir-instruction/atomic-cmpxchg.ll | 376 ++++++++++++++++-- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 24 +- + .../ir-instruction/atomicrmw-minmax.ll | 24 -- + .../LoongArch/ir-instruction/atomicrmw.ll | 31 -- + llvm/unittests/Target/LoongArch/InstSizes.cpp | 2 +- + 8 files changed, 407 insertions(+), 148 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +index 51df0463e235..eb78ef065b21 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +@@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); +- AtomicOrdering Ordering = +- static_cast(MI.getOperand(4).getImm()); + + // .loop: +- // if(Ordering != AtomicOrdering::Monotonic) +- // dbar 0 + // ll.[w|d] dest, (addr) + // binop scratch, dest, val + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loop +- if (Ordering != AtomicOrdering::Monotonic) +- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, + TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) + .addReg(AddrReg) +@@ -251,12 +245,8 @@ static void doMaskedAtomicBinOpExpansion( + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + Register MaskReg = MI.getOperand(4).getReg(); +- AtomicOrdering Ordering = +- static_cast(MI.getOperand(5).getImm()); + + // .loop: +- // if(Ordering != AtomicOrdering::Monotonic) +- // dbar 0 + // ll.w destreg, (alignedaddr) + // binop scratch, destreg, incr + // xor scratch, destreg, scratch +@@ -264,8 +254,6 @@ static void doMaskedAtomicBinOpExpansion( + // xor scratch, destreg, scratch + // sc.w scratch, scratch, (alignedaddr) + // beqz scratch, loop +- if (Ordering != AtomicOrdering::Monotonic) +- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); +@@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); +- auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); + MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); +- MF->insert(++LoopTailMBB->getIterator(), TailMBB); +- MF->insert(++TailMBB->getIterator(), DoneMBB); ++ MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopIfBodyMBB); + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopIfBodyMBB->addSuccessor(LoopTailMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); +- LoopTailMBB->addSuccessor(TailMBB); +- TailMBB->addSuccessor(DoneMBB); ++ LoopTailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); +@@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + + // + // .loophead: +- // dbar 0 + // ll.w destreg, (alignedaddr) + // and scratch2, destreg, mask + // move scratch1, destreg +- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); +@@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + // .looptail: + // sc.w scratch1, scratch1, (addr) + // beqz scratch1, loop +- // dbar 0x700 + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg) + .addReg(Scratch1Reg) + .addReg(AddrReg) +@@ -472,10 +454,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + .addReg(Scratch1Reg) + .addMBB(LoopHeadMBB); + +- // .tail: +- // dbar 0x700 +- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); +- + NextMBBI = MBB.end(); + MI.eraseFromParent(); + +@@ -483,7 +461,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); +- computeAndAddLiveIns(LiveRegs, *TailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +@@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + .addReg(CmpValReg) + .addMBB(TailMBB); + // .looptail: +- // dbar 0 + // move scratch, newval + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loophead + // b done +- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(NewValReg) + .addReg(LoongArch::R0); +@@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + .addMBB(TailMBB); + + // .looptail: +- // dbar 0 + // andn scratch, dest, mask + // or scratch, scratch, newval + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loophead + // b done +- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg) + .addReg(DestReg) + .addReg(MaskReg); +@@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } + ++ AtomicOrdering Ordering = ++ static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); ++ int hint; ++ ++ switch (Ordering) { ++ case AtomicOrdering::Acquire: ++ case AtomicOrdering::AcquireRelease: ++ case AtomicOrdering::SequentiallyConsistent: ++ // TODO: acquire ++ hint = 0; ++ break; ++ default: ++ hint = 0x700; ++ } ++ + // .tail: +- // dbar 0x700 +- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); ++ // dbar 0x700 | acquire ++ BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 05ae36a9781d..a9b0db30c2f6 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1731,7 +1731,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; + + class PseudoCmpXchg + : Pseudo<(outs GPR:$res, GPR:$scratch), +- (ins GPR:$addr, GPR:$cmpval, GPR:$newval)> { ++ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; +@@ -1821,14 +1821,28 @@ def : AtomicPat; + +-def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new), +- (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>; ++// Ordering constants must be kept in sync with the AtomicOrdering enum in ++// AtomicOrdering.h. ++multiclass PseudoCmpXchgPat { ++ def : Pat<(vt (!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; ++ def : Pat<(vt (!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; ++ def : Pat<(vt (!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; ++ def : Pat<(vt (!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; ++ def : Pat<(vt (!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; ++} ++ ++defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; ++defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; + def : Pat<(int_loongarch_masked_cmpxchg_i64 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), + (PseudoMaskedCmpXchg32 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; +-def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), +- (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; + + def : PseudoMaskedAMMinMaxPat; +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +index f11af8fe6528..32106886c783 100644 +--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +@@ -34,14 +34,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: bne $a5, $a3, .LBB0_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a7, $a6 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB0_3 + ; LA64-NEXT: b .LBB0_6 + ; LA64-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -88,14 +87,13 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bne $a5, $a3, .LBB1_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a7, $a6 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB1_3 + ; LA64-NEXT: b .LBB1_6 + ; LA64-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -129,14 +127,13 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: bne $a1, $a3, .LBB2_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a6, $a5 + ; LA64-NEXT: sc.w $a6, $a0, 0 + ; LA64-NEXT: beqz $a6, .LBB2_3 + ; LA64-NEXT: b .LBB2_6 + ; LA64-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64-NEXT: move $a3, $a1 +@@ -168,14 +165,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: bne $a2, $a3, .LBB3_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a5, $a4 + ; LA64-NEXT: sc.d $a5, $a0, 0 + ; LA64-NEXT: beqz $a5, .LBB3_3 + ; LA64-NEXT: b .LBB3_6 + ; LA64-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB3_1 +@@ -224,14 +220,13 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: bne $a6, $a3, .LBB4_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $t0, $a7 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB4_3 + ; LA64-NEXT: b .LBB4_6 + ; LA64-NEXT: .LBB4_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB4_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -283,14 +278,13 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bne $a6, $a3, .LBB5_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $t0, $a7 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB5_3 + ; LA64-NEXT: b .LBB5_6 + ; LA64-NEXT: .LBB5_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB5_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -329,14 +323,13 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: bne $a2, $a4, .LBB6_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a7, $a6 + ; LA64-NEXT: sc.w $a7, $a0, 0 + ; LA64-NEXT: beqz $a7, .LBB6_3 + ; LA64-NEXT: b .LBB6_6 + ; LA64-NEXT: .LBB6_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB6_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 +@@ -373,14 +366,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: bne $a2, $a3, .LBB7_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a5, $a4 + ; LA64-NEXT: sc.d $a5, $a0, 0 + ; LA64-NEXT: beqz $a5, .LBB7_3 + ; LA64-NEXT: b .LBB7_6 + ; LA64-NEXT: .LBB7_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB7_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB7_1 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 76e51fe7d3e8..1ac20d10e587 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -21,14 +21,13 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-NEXT: and $a5, $a4, $a0 + ; LA64-NEXT: bne $a5, $a1, .LBB0_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 + ; LA64-NEXT: beqz $a5, .LBB0_1 + ; LA64-NEXT: b .LBB0_4 + ; LA64-NEXT: .LBB0_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB0_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire +@@ -56,14 +55,13 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind + ; LA64-NEXT: and $a5, $a4, $a0 + ; LA64-NEXT: bne $a5, $a1, .LBB1_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 + ; LA64-NEXT: beqz $a5, .LBB1_1 + ; LA64-NEXT: b .LBB1_4 + ; LA64-NEXT: .LBB1_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB1_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire +@@ -77,13 +75,12 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB2_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB2_1 + ; LA64-NEXT: b .LBB2_4 + ; LA64-NEXT: .LBB2_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB2_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -97,13 +94,12 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind + ; LA64-NEXT: ll.d $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB3_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB3_1 + ; LA64-NEXT: b .LBB3_4 + ; LA64-NEXT: .LBB3_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB3_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire +@@ -130,14 +126,13 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: and $a6, $a5, $a4 + ; LA64-NEXT: bne $a6, $a1, .LBB4_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB4_1 + ; LA64-NEXT: b .LBB4_4 + ; LA64-NEXT: .LBB4_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB4_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -167,14 +162,13 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + ; LA64-NEXT: and $a6, $a5, $a4 + ; LA64-NEXT: bne $a6, $a1, .LBB5_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB5_1 + ; LA64-NEXT: b .LBB5_4 + ; LA64-NEXT: .LBB5_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB5_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -190,13 +184,12 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB6_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB6_1 + ; LA64-NEXT: b .LBB6_4 + ; LA64-NEXT: .LBB6_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB6_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -212,13 +205,12 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou + ; LA64-NEXT: ll.d $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB7_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB7_1 + ; LA64-NEXT: b .LBB7_4 + ; LA64-NEXT: .LBB7_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB7_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -247,14 +239,13 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: and $a6, $a5, $a2 + ; LA64-NEXT: bne $a6, $a1, .LBB8_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB8_1 + ; LA64-NEXT: b .LBB8_4 + ; LA64-NEXT: .LBB8_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB8_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -287,14 +278,13 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: and $a6, $a5, $a2 + ; LA64-NEXT: bne $a6, $a1, .LBB9_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB9_1 + ; LA64-NEXT: b .LBB9_4 + ; LA64-NEXT: .LBB9_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB9_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -313,13 +303,12 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB10_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB10_1 + ; LA64-NEXT: b .LBB10_4 + ; LA64-NEXT: .LBB10_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB10_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 +@@ -337,13 +326,12 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw + ; LA64-NEXT: ll.d $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB11_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB11_1 + ; LA64-NEXT: b .LBB11_4 + ; LA64-NEXT: .LBB11_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB11_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 +@@ -352,3 +340,343 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw + %res = extractvalue { i64, i1 } %tmp, 1 + ret i1 %res + } ++ ++define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB12_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB12_1 ++; LA64-NEXT: b .LBB12_4 ++; LA64-NEXT: .LBB12_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB12_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB13_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB13_1 ++; LA64-NEXT: b .LBB13_4 ++; LA64-NEXT: .LBB13_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB13_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB14_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB14_1 ++; LA64-NEXT: b .LBB14_4 ++; LA64-NEXT: .LBB14_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB14_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB15_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB15_1 ++; LA64-NEXT: b .LBB15_4 ++; LA64-NEXT: .LBB15_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB15_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ++ ret void ++} ++ ++define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a4 ++; LA64-NEXT: bne $a6, $a1, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a4 ++; LA64-NEXT: or $a6, $a6, $a2 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB16_1 ++; LA64-NEXT: b .LBB16_4 ++; LA64-NEXT: .LBB16_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB16_4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 0 ++ ret i8 %res ++} ++ ++define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a4 ++; LA64-NEXT: bne $a6, $a1, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a4 ++; LA64-NEXT: or $a6, $a6, $a2 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB17_1 ++; LA64-NEXT: b .LBB17_4 ++; LA64-NEXT: .LBB17_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB17_4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 0 ++ ret i16 %res ++} ++ ++define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB18_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB18_1 ++; LA64-NEXT: b .LBB18_4 ++; LA64-NEXT: .LBB18_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB18_4: ++; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 0 ++ ret i32 %res ++} ++ ++define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB19_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB19_1 ++; LA64-NEXT: b .LBB19_4 ++; LA64-NEXT: .LBB19_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB19_4: ++; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 0 ++ ret i64 %res ++} ++ ++define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a0, $a2, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: addi.w $a2, $a4, 0 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a2 ++; LA64-NEXT: bne $a6, $a1, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a2 ++; LA64-NEXT: or $a6, $a6, $a0 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB20_1 ++; LA64-NEXT: b .LBB20_4 ++; LA64-NEXT: .LBB20_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB20_4: ++; LA64-NEXT: and $a0, $a5, $a4 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: xor $a0, $a1, $a0 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a0, $a2, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: addi.w $a2, $a4, 0 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a2 ++; LA64-NEXT: bne $a6, $a1, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a2 ++; LA64-NEXT: or $a6, $a6, $a0 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB21_1 ++; LA64-NEXT: b .LBB21_4 ++; LA64-NEXT: .LBB21_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB21_4: ++; LA64-NEXT: and $a0, $a5, $a4 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: xor $a0, $a1, $a0 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB22_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB22_1 ++; LA64-NEXT: b .LBB22_4 ++; LA64-NEXT: .LBB22_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB22_4: ++; LA64-NEXT: addi.w $a0, $a1, 0 ++; LA64-NEXT: xor $a0, $a3, $a0 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB23_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB23_1 ++; LA64-NEXT: b .LBB23_4 ++; LA64-NEXT: .LBB23_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB23_4: ++; LA64-NEXT: xor $a0, $a3, $a1 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 1 ++ ret i1 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 9767717395b6..9a29d67e9982 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -25,14 +25,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB0_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB0_3 + ; LA64F-NEXT: b .LBB0_6 + ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -61,14 +60,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB0_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB0_3 + ; LA64D-NEXT: b .LBB0_6 + ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -101,14 +99,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB1_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB1_3 + ; LA64F-NEXT: b .LBB1_6 + ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -137,14 +134,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB1_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB1_3 + ; LA64D-NEXT: b .LBB1_6 + ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -178,14 +174,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB2_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB2_3 + ; LA64F-NEXT: b .LBB2_6 + ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -215,14 +210,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB2_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB2_3 + ; LA64D-NEXT: b .LBB2_6 + ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -256,14 +250,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB3_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB3_3 + ; LA64F-NEXT: b .LBB3_6 + ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -293,14 +286,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB3_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB3_3 + ; LA64D-NEXT: b .LBB3_6 + ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +index cd4a9e7fa9c4..26ba77e8d4fd 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +@@ -17,7 +17,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -30,8 +29,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB0_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i8 %b acquire +@@ -52,7 +49,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -65,8 +61,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB1_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i16 %b acquire +@@ -106,7 +100,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -119,8 +112,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB4_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i8 %b acquire +@@ -141,7 +132,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -154,8 +144,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB5_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i16 %b acquire +@@ -197,7 +185,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a4, $a0, 24 + ; LA64-NEXT: xori $a4, $a4, 56 + ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a3 + ; LA64-NEXT: move $a6, $a5 +@@ -212,8 +199,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB8_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i8 %b acquire +@@ -237,7 +222,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a4 + ; LA64-NEXT: move $a6, $a5 +@@ -252,8 +236,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB9_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i16 %b acquire +@@ -295,7 +277,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a4, $a0, 24 + ; LA64-NEXT: xori $a4, $a4, 56 + ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a3 + ; LA64-NEXT: move $a6, $a5 +@@ -310,8 +291,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB12_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i8 %b acquire +@@ -335,7 +314,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a4 + ; LA64-NEXT: move $a6, $a5 +@@ -350,8 +328,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB13_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i16 %b acquire +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +index c077d14f728f..626276ba05f7 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +@@ -13,7 +13,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -37,7 +36,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -64,7 +62,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -89,7 +86,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -108,7 +104,6 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xchg_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: move $a3, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -157,7 +152,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -181,7 +175,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -208,7 +201,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -233,7 +225,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -252,7 +243,6 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_add_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: add.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -301,7 +291,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -325,7 +314,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -352,7 +340,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -377,7 +364,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -396,7 +382,6 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_sub_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: sub.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -447,7 +432,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -472,7 +456,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -500,7 +483,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -526,7 +508,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -546,7 +527,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_nand_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: nor $a3, $a3, $zero +@@ -559,7 +539,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA64-LABEL: atomicrmw_nand_i32_acquire: + ; LA64: # %bb.0: + ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero +@@ -586,7 +565,6 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { + ; LA64-LABEL: atomicrmw_nand_i64_acquire: + ; LA64: # %bb.0: + ; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.d $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero +@@ -611,7 +589,6 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: addi.w $a3, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a3 + ; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a0, 0 + ; LA32-NEXT: and $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +@@ -650,7 +627,6 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: addi.w $a2, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a2 + ; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a4, $a2, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +@@ -681,7 +657,6 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_and_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -728,7 +703,6 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -760,7 +734,6 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -787,7 +760,6 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_or_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: or $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -834,7 +806,6 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -866,7 +837,6 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -893,7 +863,6 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xor_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: xor $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp +index 1a5d4369c48b..3180c7237a79 100644 +--- a/llvm/unittests/Target/LoongArch/InstSizes.cpp ++++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp +@@ -121,7 +121,7 @@ TEST(InstSizes, AtomicPseudo) { + " dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoAtomicLoadAdd32 renamable $r7, renamable $r6, renamable $r8\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9, dead early-clobber renamable $r10 = PseudoMaskedAtomicLoadUMax32 renamable $r7, renamable $r6, renamable $r8, 4\n" + " early-clobber renamable $r9, dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoMaskedAtomicLoadMax32 killed renamable $r6, killed renamable $r5, killed renamable $r7, killed renamable $r8, 4\n" +- " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6\n" ++ " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6, 4\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoMaskedCmpXchg32 killed renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r8, 4\n", + // clang-format on + [](LoongArchInstrInfo &II, MachineFunction &MF) { +-- +2.20.1 + diff --git a/0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch b/0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch new file mode 100644 index 0000000..113f2a7 --- /dev/null +++ b/0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch @@ -0,0 +1,27 @@ +From 49f827b09db549de62dcaf8b90b3fcb3e08c0ee5 Mon Sep 17 00:00:00 2001 +From: Serge Guelton +Date: Mon, 6 Mar 2023 12:37:48 +0100 +Subject: [PATCH] Make -funwind-tables the default on all archs + +--- + clang/lib/Driver/ToolChains/Gnu.cpp | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp +index 24fbdcffc07b..8fed46b49515 100644 +--- a/clang/lib/Driver/ToolChains/Gnu.cpp ++++ b/clang/lib/Driver/ToolChains/Gnu.cpp +@@ -2904,6 +2904,10 @@ Generic_GCC::getDefaultUnwindTableLevel(const ArgList &Args) const { + case llvm::Triple::riscv64: + case llvm::Triple::x86: + case llvm::Triple::x86_64: ++ // Enable -funwind-tables on all architectures supported by Fedora: ++ // rhbz#1655546 ++ case llvm::Triple::systemz: ++ case llvm::Triple::arm: + return UnwindTableLevel::Asynchronous; + default: + return UnwindTableLevel::None; +-- +2.39.1 + diff --git a/0001-Workaround-a-bug-in-ORC-on-ppc64le.patch b/0001-Workaround-a-bug-in-ORC-on-ppc64le.patch new file mode 100644 index 0000000..af236a1 --- /dev/null +++ b/0001-Workaround-a-bug-in-ORC-on-ppc64le.patch @@ -0,0 +1,30 @@ +From a2449cee8c995b56f1892502aab3dfad3d6f3ca1 Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Fri, 8 Sep 2023 11:45:34 -0300 +Subject: [PATCH] Workaround a bug in ORC on ppc64le + +The Jit code appears to be returning the wrong printf symbol on ppc64le +after the transition of the default long double to IEEE 128-bit floating +point. +--- + clang/unittests/Interpreter/InterpreterTest.cpp | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/clang/unittests/Interpreter/InterpreterTest.cpp b/clang/unittests/Interpreter/InterpreterTest.cpp +index abb8e6377aab..7b6697ebc6ed 100644 +--- a/clang/unittests/Interpreter/InterpreterTest.cpp ++++ b/clang/unittests/Interpreter/InterpreterTest.cpp +@@ -243,7 +243,9 @@ TEST(IncrementalProcessing, FindMangledNameSymbol) { + EXPECT_FALSE(!Addr); + + // FIXME: Re-enable when we investigate the way we handle dllimports on Win. +-#ifndef _WIN32 ++ // FIXME: The printf symbol returned from the Jit may not be correct on ++ // ppc64le when the default long double is IEEE 128-bit fp. ++#if !defined _WIN32 && !(defined __PPC64__ && defined __LITTLE_ENDIAN__) + EXPECT_EQ((uintptr_t)&printf, Addr->getValue()); + #endif // _WIN32 + } +-- +2.41.0 + diff --git a/0001-clang-shlib-Add-symbol-versioning-to-all-symbols.patch b/0001-clang-shlib-Add-symbol-versioning-to-all-symbols.patch new file mode 100644 index 0000000..aa80db6 --- /dev/null +++ b/0001-clang-shlib-Add-symbol-versioning-to-all-symbols.patch @@ -0,0 +1,39 @@ +From 59673ed97d92df34d6f662da5a51f6e28806b5af Mon Sep 17 00:00:00 2001 +From: Tom Stellard +Date: Thu, 26 Sep 2024 13:53:18 +0000 +Subject: [PATCH] [clang-shlib] Add symbol versioning to all symbols + +We do the same thing for libLLVM.so. This should help avoid issues +when an applications loads two different versions of the library at +the same time. +--- + clang/tools/clang-shlib/CMakeLists.txt | 7 +++++++ + clang/tools/clang-shlib/simple_version_script.map.in | 1 + + 2 files changed, 8 insertions(+) + create mode 100644 clang/tools/clang-shlib/simple_version_script.map.in + +diff --git a/clang/tools/clang-shlib/CMakeLists.txt b/clang/tools/clang-shlib/CMakeLists.txt +index 298d3a9d18fe..004ce2897960 100644 +--- a/clang/tools/clang-shlib/CMakeLists.txt ++++ b/clang/tools/clang-shlib/CMakeLists.txt +@@ -61,3 +61,10 @@ if (MINGW OR CYGWIN) + # make sure we export all symbols despite potential dllexports. + target_link_options(clang-cpp PRIVATE LINKER:--export-all-symbols) + endif() ++ ++# Solaris ld does not accept global: *; so there is no way to version *all* global symbols ++if (NOT LLVM_LINKER_IS_SOLARISLD AND NOT MINGW) ++ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/simple_version_script.map.in ++ ${CMAKE_CURRENT_BINARY_DIR}/simple_version_script.map) ++ target_link_options(clang-cpp PRIVATE -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/simple_version_script.map) ++endif() +diff --git a/clang/tools/clang-shlib/simple_version_script.map.in b/clang/tools/clang-shlib/simple_version_script.map.in +new file mode 100644 +index 000000000000..cb2306d1f596 +--- /dev/null ++++ b/clang/tools/clang-shlib/simple_version_script.map.in +@@ -0,0 +1 @@ ++@LLVM_SHLIB_SYMBOL_VERSION@ { global: *; }; +-- +2.46.0 + diff --git a/0001-flang-Remove-the-dependency-on-Bye.patch b/0001-flang-Remove-the-dependency-on-Bye.patch new file mode 100644 index 0000000..7db151f --- /dev/null +++ b/0001-flang-Remove-the-dependency-on-Bye.patch @@ -0,0 +1,47 @@ +From 11af57106d4b6a2db178d932f58bd3285d1eefc1 Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Wed, 22 Feb 2023 18:46:40 -0300 +Subject: [PATCH] [flang] Remove the dependency on Bye + +This plugin is not distributed on Fedora. +--- + flang/test/CMakeLists.txt | 4 ---- + flang/test/Driver/frontend-forwarding.f90 | 2 -- + 2 files changed, 6 deletions(-) + +diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt +index 7d96a72e5f36..73a1f11b4533 100644 +--- a/flang/test/CMakeLists.txt ++++ b/flang/test/CMakeLists.txt +@@ -65,9 +65,6 @@ set(FLANG_TEST_DEPENDS + Fortran_main + FortranDecimal + ) +-if (LLVM_ENABLE_PLUGINS AND NOT WIN32) +- list(APPEND FLANG_TEST_DEPENDS Bye) +-endif() + + if (FLANG_INCLUDE_TESTS) + if (FLANG_GTEST_AVAIL) +diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 +index e953c957d2d0..38ad48f2b4bd 100644 +--- a/flang/test/Driver/frontend-forwarding.f90 ++++ b/flang/test/Driver/frontend-forwarding.f90 +@@ -14,7 +14,6 @@ + ! RUN: -fno-signed-zeros \ + ! RUN: -fassociative-math \ + ! RUN: -freciprocal-math \ +-! RUN: -fpass-plugin=Bye%pluginext \ + ! RUN: -fversion-loops-for-stride \ + ! RUN: -flang-experimental-polymorphism \ + ! RUN: -mllvm -print-before-all \ +@@ -35,7 +34,6 @@ + ! CHECK: "-mreassociate" + ! CHECK: "-freciprocal-math" + ! CHECK: "-fconvert=little-endian" +-! CHECK: "-fpass-plugin=Bye + ! CHECK: "-flang-experimental-polymorphism" + ! CHECK: "-fversion-loops-for-stride" + ! CHECK: "-mllvm" "-print-before-all" +-- +2.39.1 diff --git a/0001-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-6-relocat.patch b/0001-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-6-relocat.patch new file mode 100644 index 0000000..54b3e67 --- /dev/null +++ b/0001-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-6-relocat.patch @@ -0,0 +1,77 @@ +From 7f14e7c1b116fc865ddebb78e67816bfc5216178 Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Wed, 15 Nov 2023 09:57:45 +0800 +Subject: [PATCH 01/14] [lld][LoongArch] Support the R_LARCH_{ADD,SUB}6 + relocation type (#72190) + +The R_LARCH_{ADD,SUB}6 relocation type are usually used by DwarfCFA to +calculate a tiny offset. They appear after binutils 2.41, with GAS +enabling relaxation by default. + +(cherry picked from commit 72accbfd0a1023b3182202276904120524ff9200) +--- + lld/ELF/Arch/LoongArch.cpp | 8 ++++++++ + lld/test/ELF/loongarch-add-sub.s | 6 +++++- + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp +index 04ddb4682917..d3a538577a59 100644 +--- a/lld/ELF/Arch/LoongArch.cpp ++++ b/lld/ELF/Arch/LoongArch.cpp +@@ -444,10 +444,12 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, + case R_LARCH_TLS_LE64_LO20: + case R_LARCH_TLS_LE64_HI12: + return R_TPREL; ++ case R_LARCH_ADD6: + case R_LARCH_ADD8: + case R_LARCH_ADD16: + case R_LARCH_ADD32: + case R_LARCH_ADD64: ++ case R_LARCH_SUB6: + case R_LARCH_SUB8: + case R_LARCH_SUB16: + case R_LARCH_SUB32: +@@ -650,6 +652,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, + write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52))); + return; + ++ case R_LARCH_ADD6: ++ *loc = (*loc & 0xc0) | ((*loc + val) & 0x3f); ++ return; + case R_LARCH_ADD8: + *loc += val; + return; +@@ -662,6 +667,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, + case R_LARCH_ADD64: + write64le(loc, read64le(loc) + val); + return; ++ case R_LARCH_SUB6: ++ *loc = (*loc & 0xc0) | ((*loc - val) & 0x3f); ++ return; + case R_LARCH_SUB8: + *loc -= val; + return; +diff --git a/lld/test/ELF/loongarch-add-sub.s b/lld/test/ELF/loongarch-add-sub.s +index 63a3f7de179e..35f8a053d69c 100644 +--- a/lld/test/ELF/loongarch-add-sub.s ++++ b/lld/test/ELF/loongarch-add-sub.s +@@ -6,7 +6,7 @@ + # RUN: llvm-readelf -x .rodata %t.la64 | FileCheck --check-prefix=CHECK %s + # CHECK: section '.rodata': + # CHECK-NEXT: 0x9876543210 10325476 98badcfe 804602be 79ffffff +-# CHECK-NEXT: 0x9876543220 804602be 804680 ++# CHECK-NEXT: 0x9876543220 804602be 80468097 + + .text + .global _start +@@ -34,3 +34,7 @@ quux: + .byte 0 + .reloc quux, R_LARCH_ADD8, 1b + .reloc quux, R_LARCH_SUB8, 2b ++qux: ++ .byte 0b10000000 ++ .reloc qux, R_LARCH_ADD6, qux ++ .reloc qux, R_LARCH_SUB6, 2b +-- +2.20.1 + diff --git a/0001-openmp-Add-option-to-disable-tsan-tests-111548.patch b/0001-openmp-Add-option-to-disable-tsan-tests-111548.patch new file mode 100644 index 0000000..0e30347 --- /dev/null +++ b/0001-openmp-Add-option-to-disable-tsan-tests-111548.patch @@ -0,0 +1,62 @@ +From b2edeb58b8cb3268acee425cd52b406eb60a8095 Mon Sep 17 00:00:00 2001 +From: Nikita Popov +Date: Wed, 9 Oct 2024 11:29:30 +0200 +Subject: [PATCH] [openmp] Add option to disable tsan tests (#111548) + +This adds a OPENMP_TEST_ENABLE_TSAN option that allows to override +whether tests using tsan will be enabled. The option defaults to the +existing auto-detection. + +The background here is +https://github.com/llvm/llvm-project/issues/111492, where we have some +systems where tsan doesn't work, but we do still want to build it and +run tests that don't use tsan. +--- + openmp/cmake/OpenMPTesting.cmake | 3 +++ + openmp/tools/archer/tests/CMakeLists.txt | 2 +- + openmp/tools/archer/tests/lit.site.cfg.in | 2 +- + 3 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/openmp/cmake/OpenMPTesting.cmake b/openmp/cmake/OpenMPTesting.cmake +index c67ad8b1cbd9..14cc5c67d84c 100644 +--- a/openmp/cmake/OpenMPTesting.cmake ++++ b/openmp/cmake/OpenMPTesting.cmake +@@ -163,6 +163,9 @@ else() + set(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS 1) + endif() + ++set(OPENMP_TEST_ENABLE_TSAN "${OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS}" CACHE BOOL ++ "Whether to enable tests using tsan") ++ + # Function to set compiler features for use in lit. + function(update_test_compiler_features) + set(FEATURES "[") +diff --git a/openmp/tools/archer/tests/CMakeLists.txt b/openmp/tools/archer/tests/CMakeLists.txt +index 5de91148fa4b..412c7d63725e 100644 +--- a/openmp/tools/archer/tests/CMakeLists.txt ++++ b/openmp/tools/archer/tests/CMakeLists.txt +@@ -28,7 +28,7 @@ macro(pythonize_bool var) + endmacro() + + pythonize_bool(LIBARCHER_HAVE_LIBATOMIC) +-pythonize_bool(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS) ++pythonize_bool(OPENMP_TEST_ENABLE_TSAN) + + set(ARCHER_TSAN_TEST_DEPENDENCE "") + if(TARGET tsan) +diff --git a/openmp/tools/archer/tests/lit.site.cfg.in b/openmp/tools/archer/tests/lit.site.cfg.in +index 55edfde9738e..ddcb7b8bc3a5 100644 +--- a/openmp/tools/archer/tests/lit.site.cfg.in ++++ b/openmp/tools/archer/tests/lit.site.cfg.in +@@ -12,7 +12,7 @@ config.omp_library_dir = "@LIBOMP_LIBRARY_DIR@" + config.omp_header_dir = "@LIBOMP_INCLUDE_DIR@" + config.operating_system = "@CMAKE_SYSTEM_NAME@" + config.has_libatomic = @LIBARCHER_HAVE_LIBATOMIC@ +-config.has_tsan = @OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS@ ++config.has_tsan = @OPENMP_TEST_ENABLE_TSAN@ + + config.test_archer_flags = "@LIBARCHER_TEST_FLAGS@" + config.libarcher_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" +-- +2.46.0 + diff --git a/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch b/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch new file mode 100644 index 0000000..7f0a7cf --- /dev/null +++ b/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch @@ -0,0 +1,86 @@ +From ccc2b792e57d632bc887b226a4e7f0a8189eab8b Mon Sep 17 00:00:00 2001 +From: Josh Stone +Date: Mon, 4 Nov 2024 16:37:49 -0800 +Subject: [PATCH] [profile] Use base+vaddr for `__llvm_write_binary_ids` note + pointers + +This function is always examining its own ELF headers in memory, but it +was trying to use conditions between examining files or memory, and it +wasn't accounting for LOAD offsets at runtime. This is especially bad if +a loaded segment has additional padding that's not in the file offsets. + +Now we do a first scan of the program headers to figure out the runtime +base address based on `PT_PHDR` and/or `PT_DYNAMIC` (else assume zero), +similar to libc's `do_start`. Then each `PT_NOTE` pointer is simply the +base plus the segments's `pt_vaddr`, which includes LOAD offsets. + +Fixes #114605 +--- + .../lib/profile/InstrProfilingPlatformLinux.c | 40 ++++++++----------- + 1 file changed, 16 insertions(+), 24 deletions(-) + +diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +index e2c06d51e0c6..c365129a0768 100644 +--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c ++++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +@@ -194,41 +194,33 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note, + */ + COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) { + extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden"))); ++ extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden"))); ++ + const ElfW(Ehdr) *ElfHeader = &__ehdr_start; + const ElfW(Phdr) *ProgramHeader = + (const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff); + ++ /* Compute the added base address in case of position-independent code. */ ++ uintptr_t Base = 0; ++ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) { ++ if (ProgramHeader[I].p_type == PT_PHDR) ++ Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr; ++ if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC) ++ Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr; ++ } ++ + int TotalBinaryIdsSize = 0; +- uint32_t I; + /* Iterate through entries in the program header. */ +- for (I = 0; I < ElfHeader->e_phnum; I++) { ++ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) { + /* Look for the notes segment in program header entries. */ + if (ProgramHeader[I].p_type != PT_NOTE) + continue; + + /* There can be multiple notes segment, and examine each of them. */ +- const ElfW(Nhdr) * Note; +- const ElfW(Nhdr) * NotesEnd; +- /* +- * When examining notes in file, use p_offset, which is the offset within +- * the elf file, to find the start of notes. +- */ +- if (ProgramHeader[I].p_memsz == 0 || +- ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) { +- Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + +- ProgramHeader[I].p_offset); +- NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) + +- ProgramHeader[I].p_filesz); +- } else { +- /* +- * When examining notes in memory, use p_vaddr, which is the address of +- * section after loaded to memory, to find the start of notes. +- */ +- Note = +- (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr); +- NotesEnd = +- (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz); +- } ++ const ElfW(Nhdr) *Note = ++ (const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr); ++ const ElfW(Nhdr) *NotesEnd = ++ (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz); + + int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd); + if (TotalBinaryIdsSize == -1) +-- +2.47.0 + diff --git a/0001-sanitizer-msan-VarArgHelper-for-loongarch64.patch b/0001-sanitizer-msan-VarArgHelper-for-loongarch64.patch new file mode 100644 index 0000000..f6cf0c8 --- /dev/null +++ b/0001-sanitizer-msan-VarArgHelper-for-loongarch64.patch @@ -0,0 +1,153 @@ +From 857dc000141b237da73a43d59e22672750501559 Mon Sep 17 00:00:00 2001 +From: zhanglimin +Date: Tue, 12 Sep 2023 09:51:16 +0800 +Subject: [PATCH 01/27] [sanitizer][msan] VarArgHelper for loongarch64 + +This patch adds support for variadic argument for loongarch64, +which is based on MIPS64. And `check-msan` all pass. + +Reviewed By: vitalybuka + +Differential Revision: https://reviews.llvm.org/D158587 + +(cherry picked from commit ec42c78cc43ac1e8364e5a0941aa5fc91b813dd3) +--- + compiler-rt/test/msan/signal_stress_test.cpp | 3 - + .../Instrumentation/MemorySanitizer.cpp | 7 ++ + .../LoongArch/vararg-loongarch64.ll | 78 +++++++++++++++++++ + 3 files changed, 85 insertions(+), 3 deletions(-) + create mode 100644 llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll + +diff --git a/compiler-rt/test/msan/signal_stress_test.cpp b/compiler-rt/test/msan/signal_stress_test.cpp +index aade0f1f4051..043393fce6de 100644 +--- a/compiler-rt/test/msan/signal_stress_test.cpp ++++ b/compiler-rt/test/msan/signal_stress_test.cpp +@@ -5,9 +5,6 @@ + // Reported deadly signal due to stack-overflow + // XFAIL: target={{.*netbsd.*}} + +-// VarArg implementation on LoongArch isn't supported yet. +-// UNSUPPORTED: target=loongarch{{.*}} +- + #include + #include + #include +diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +index 83d90049abc3..362fd6e4151f 100644 +--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp ++++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +@@ -4945,6 +4945,7 @@ struct VarArgAMD64Helper : public VarArgHelper { + }; + + /// MIPS64-specific implementation of VarArgHelper. ++/// NOTE: This is also used for LoongArch64. + struct VarArgMIPS64Helper : public VarArgHelper { + Function &F; + MemorySanitizer &MS; +@@ -5836,6 +5837,10 @@ struct VarArgSystemZHelper : public VarArgHelper { + } + }; + ++// Loongarch64 is not a MIPS, but the current vargs calling convention matches ++// the MIPS. ++using VarArgLoongArch64Helper = VarArgMIPS64Helper; ++ + /// A no-op implementation of VarArgHelper. + struct VarArgNoOpHelper : public VarArgHelper { + VarArgNoOpHelper(Function &F, MemorySanitizer &MS, +@@ -5868,6 +5873,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + return new VarArgPowerPC64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == Triple::systemz) + return new VarArgSystemZHelper(Func, Msan, Visitor); ++ else if (TargetTriple.isLoongArch64()) ++ return new VarArgLoongArch64Helper(Func, Msan, Visitor); + else + return new VarArgNoOpHelper(Func, Msan, Visitor); + } +diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +new file mode 100644 +index 000000000000..8a4ab59588ad +--- /dev/null ++++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +@@ -0,0 +1,78 @@ ++; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s ++ ++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++target triple = "loongarch64-unknown-linux-gnu" ++ ++;; First, check allocation of the save area. ++declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 ++declare void @llvm.va_start(ptr) #2 ++declare void @llvm.va_end(ptr) #2 ++declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 ++define i32 @foo(i32 %guard, ...) { ++; CHECK-LABEL: @foo ++; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls ++; CHECK: [[TMP2:%.*]] = add i64 0, [[TMP1]] ++; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP2]] ++; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false) ++; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800) ++; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false) ++; ++ %vl = alloca ptr, align 8 ++ call void @llvm.lifetime.start.p0(i64 32, ptr %vl) ++ call void @llvm.va_start(ptr %vl) ++ call void @llvm.va_end(ptr %vl) ++ call void @llvm.lifetime.end.p0(i64 32, ptr %vl) ++ ret i32 0 ++} ++ ++;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ++;; array. ++define i32 @bar() { ++; CHECK-LABEL: @bar ++; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 ++; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ++; ++ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ++ ret i32 %1 ++} ++ ++;; Check multiple fixed arguments. ++declare i32 @foo2(i32 %g1, i32 %g2, ...) ++define i32 @bar2() { ++; CHECK-LABEL: @bar2 ++; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 ++; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ++; ++ %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ++ ret i32 %1 ++} ++ ++;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ++;; passed to a variadic function. ++declare i64 @sum(i64 %n, ...) ++define dso_local i64 @many_args() { ++;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. ++; CHECK-LABEL: @many_args ++; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) ++; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) ++; ++entry: ++ %ret = call i64 (i64, ...) @sum(i64 120, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 ++ ) ++ ret i64 %ret ++} +-- +2.20.1 + diff --git a/0002-BinaryFormat-LoongArch-Define-psABI-v2.20-relocs-for.patch b/0002-BinaryFormat-LoongArch-Define-psABI-v2.20-relocs-for.patch new file mode 100644 index 0000000..3525093 --- /dev/null +++ b/0002-BinaryFormat-LoongArch-Define-psABI-v2.20-relocs-for.patch @@ -0,0 +1,63 @@ +From 357d6de40b26b7ecce3fc196680604c300de8e38 Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Tue, 5 Dec 2023 09:20:48 +0800 +Subject: [PATCH 02/23] [BinaryFormat][LoongArch] Define psABI v2.20 relocs for + R_LARCH_CALL36(#73345) + +R_LARCH_CALL36 was designed for function call on medium code model where +the 2 instructions (pcaddu18i + jirl) must be adjacent. + +(cherry picked from commit c3a9c905fbc486add75e16218fe58a04b7b6c282) +--- + llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def | 6 ++++++ + .../tools/llvm-readobj/ELF/reloc-types-loongarch64.test | 2 ++ + llvm/unittests/Object/ELFTest.cpp | 2 ++ + 3 files changed, 10 insertions(+) + +diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +index 02bce3c71712..c4393432677b 100644 +--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def ++++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +@@ -118,3 +118,9 @@ ELF_RELOC(R_LARCH_SUB6, 106) + ELF_RELOC(R_LARCH_ADD_ULEB128, 107) + ELF_RELOC(R_LARCH_SUB_ULEB128, 108) + ELF_RELOC(R_LARCH_64_PCREL, 109) ++ ++// Relocs added in ELF for the LoongArchâ„¢ Architecture v20231102, part of the ++// v2.20 LoongArch ABI specs. ++// ++// Spec addition: https://github.com/loongson/la-abi-specs/pull/4 ++ELF_RELOC(R_LARCH_CALL36, 110) +diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +index e32dc893fa79..88ff7fa405ed 100644 +--- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test ++++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +@@ -102,6 +102,7 @@ + # CHECK: Type: R_LARCH_ADD_ULEB128 (107) + # CHECK: Type: R_LARCH_SUB_ULEB128 (108) + # CHECK: Type: R_LARCH_64_PCREL (109) ++# CHECK: Type: R_LARCH_CALL36 (110) + + --- !ELF + FileHeader: +@@ -211,3 +212,4 @@ Sections: + - Type: R_LARCH_ADD_ULEB128 + - Type: R_LARCH_SUB_ULEB128 + - Type: R_LARCH_64_PCREL ++ - Type: R_LARCH_CALL36 +diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp +index 50b1df124a4a..ed851dde4c00 100644 +--- a/llvm/unittests/Object/ELFTest.cpp ++++ b/llvm/unittests/Object/ELFTest.cpp +@@ -251,6 +251,8 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) { + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128)); + EXPECT_EQ("R_LARCH_64_PCREL", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL)); ++ EXPECT_EQ("R_LARCH_CALL36", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CALL36)); + } + + TEST(ELFTest, getELFRelativeRelocationType) { +-- +2.20.1 + diff --git a/0002-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch b/0002-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch new file mode 100644 index 0000000..e432845 --- /dev/null +++ b/0002-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch @@ -0,0 +1,41 @@ +From fe9da4afce8f001e9e62ff1a84fe274a2f65c063 Mon Sep 17 00:00:00 2001 +From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> +Date: Thu, 28 Sep 2023 15:20:27 +0800 +Subject: [PATCH 02/27] [Driver] Support -fsanitize=cfi-icall on loongarch64 + (#67310) + +(cherry picked from commit 55accc82bec48acae769b086ad9a5dc29da77f02) +--- + clang/lib/Driver/ToolChain.cpp | 3 ++- + clang/test/Driver/fsanitize.c | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp +index 8dafc3d481c2..138f4135b7a1 100644 +--- a/clang/lib/Driver/ToolChain.cpp ++++ b/clang/lib/Driver/ToolChain.cpp +@@ -1273,7 +1273,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const { + if (getTriple().getArch() == llvm::Triple::x86 || + getTriple().getArch() == llvm::Triple::x86_64 || + getTriple().getArch() == llvm::Triple::arm || getTriple().isWasm() || +- getTriple().isAArch64() || getTriple().isRISCV()) ++ getTriple().isAArch64() || getTriple().isRISCV() || ++ getTriple().isLoongArch64()) + Res |= SanitizerKind::CFIICall; + if (getTriple().getArch() == llvm::Triple::x86_64 || + getTriple().isAArch64(64) || getTriple().isRISCV()) +diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c +index 9442f6b91471..4a525d75ea11 100644 +--- a/clang/test/Driver/fsanitize.c ++++ b/clang/test/Driver/fsanitize.c +@@ -600,6 +600,7 @@ + // RUN: %clang --target=aarch64_be -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI + // RUN: %clang --target=riscv32 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI + // RUN: %clang --target=riscv64 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI ++// RUN: %clang --target=loongarch64 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI + // CHECK-CFI: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-icall,cfi-mfcall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall + // CHECK-CFI-NOMFCALL: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-icall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall + // CHECK-CFI-DCAST: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast +-- +2.20.1 + diff --git a/0002-LoongArch-Add-LSX-intrinsic-support.patch b/0002-LoongArch-Add-LSX-intrinsic-support.patch new file mode 100644 index 0000000..8d22bd4 --- /dev/null +++ b/0002-LoongArch-Add-LSX-intrinsic-support.patch @@ -0,0 +1,2725 @@ +From b1101237c46337236b43f9dbae88b03bf0a526f8 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Sat, 19 Aug 2023 15:58:38 +0800 +Subject: [PATCH 02/42] [LoongArch] Add LSX intrinsic support + +For handling intrinsics, our approach is not simply to match them +one-to-one with instructions. Instead, we lower some intrinsics +to common nodes and then perform matching. The advantage of this +approach is that it allows us to fully utilize the passes available +at the common layer for optimizing purposes. + +We perform error checks on the immediate operand of all intrinsics, +rather than waiting until the end to throw exceptions. + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D155829 + +(cherry picked from commit 53141b2fcfa20616970833e6513537d211116c05) + +--- + llvm/include/llvm/IR/IntrinsicsLoongArch.td | 524 ++++++++++ + .../LoongArch/LoongArchISelDAGToDAG.cpp | 100 +- + .../Target/LoongArch/LoongArchISelDAGToDAG.h | 8 + + .../LoongArch/LoongArchISelLowering.cpp | 902 +++++++++++++++++- + .../Target/LoongArch/LoongArchISelLowering.h | 14 + + .../Target/LoongArch/LoongArchInstrInfo.cpp | 12 + + .../Target/LoongArch/LoongArchInstrInfo.td | 6 +- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 816 ++++++++++++++++ + 8 files changed, 2359 insertions(+), 23 deletions(-) + +diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +index 4219b2f55346..d39d8261ebe3 100644 +--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td ++++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +@@ -123,3 +123,527 @@ def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; + } // TargetPrefix = "loongarch" ++ ++/// Vector intrinsic ++ ++class VecInt ret_types, list param_types, ++ list intr_properties = []> ++ : Intrinsic, ++ ClangBuiltin; ++ ++//===----------------------------------------------------------------------===// ++// LSX ++ ++let TargetPrefix = "loongarch" in { ++ ++foreach inst = ["vadd_b", "vsub_b", ++ "vsadd_b", "vsadd_bu", "vssub_b", "vssub_bu", ++ "vavg_b", "vavg_bu", "vavgr_b", "vavgr_bu", ++ "vabsd_b", "vabsd_bu", "vadda_b", ++ "vmax_b", "vmax_bu", "vmin_b", "vmin_bu", ++ "vmul_b", "vmuh_b", "vmuh_bu", ++ "vdiv_b", "vdiv_bu", "vmod_b", "vmod_bu", "vsigncov_b", ++ "vand_v", "vor_v", "vxor_v", "vnor_v", "vandn_v", "vorn_v", ++ "vsll_b", "vsrl_b", "vsra_b", "vrotr_b", "vsrlr_b", "vsrar_b", ++ "vbitclr_b", "vbitset_b", "vbitrev_b", ++ "vseq_b", "vsle_b", "vsle_bu", "vslt_b", "vslt_bu", ++ "vpackev_b", "vpackod_b", "vpickev_b", "vpickod_b", ++ "vilvl_b", "vilvh_b"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vadd_h", "vsub_h", ++ "vsadd_h", "vsadd_hu", "vssub_h", "vssub_hu", ++ "vavg_h", "vavg_hu", "vavgr_h", "vavgr_hu", ++ "vabsd_h", "vabsd_hu", "vadda_h", ++ "vmax_h", "vmax_hu", "vmin_h", "vmin_hu", ++ "vmul_h", "vmuh_h", "vmuh_hu", ++ "vdiv_h", "vdiv_hu", "vmod_h", "vmod_hu", "vsigncov_h", ++ "vsll_h", "vsrl_h", "vsra_h", "vrotr_h", "vsrlr_h", "vsrar_h", ++ "vbitclr_h", "vbitset_h", "vbitrev_h", ++ "vseq_h", "vsle_h", "vsle_hu", "vslt_h", "vslt_hu", ++ "vpackev_h", "vpackod_h", "vpickev_h", "vpickod_h", ++ "vilvl_h", "vilvh_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vadd_w", "vsub_w", ++ "vsadd_w", "vsadd_wu", "vssub_w", "vssub_wu", ++ "vavg_w", "vavg_wu", "vavgr_w", "vavgr_wu", ++ "vabsd_w", "vabsd_wu", "vadda_w", ++ "vmax_w", "vmax_wu", "vmin_w", "vmin_wu", ++ "vmul_w", "vmuh_w", "vmuh_wu", ++ "vdiv_w", "vdiv_wu", "vmod_w", "vmod_wu", "vsigncov_w", ++ "vsll_w", "vsrl_w", "vsra_w", "vrotr_w", "vsrlr_w", "vsrar_w", ++ "vbitclr_w", "vbitset_w", "vbitrev_w", ++ "vseq_w", "vsle_w", "vsle_wu", "vslt_w", "vslt_wu", ++ "vpackev_w", "vpackod_w", "vpickev_w", "vpickod_w", ++ "vilvl_w", "vilvh_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vadd_d", "vadd_q", "vsub_d", "vsub_q", ++ "vsadd_d", "vsadd_du", "vssub_d", "vssub_du", ++ "vhaddw_q_d", "vhaddw_qu_du", "vhsubw_q_d", "vhsubw_qu_du", ++ "vaddwev_q_d", "vaddwod_q_d", "vsubwev_q_d", "vsubwod_q_d", ++ "vaddwev_q_du", "vaddwod_q_du", "vsubwev_q_du", "vsubwod_q_du", ++ "vaddwev_q_du_d", "vaddwod_q_du_d", ++ "vavg_d", "vavg_du", "vavgr_d", "vavgr_du", ++ "vabsd_d", "vabsd_du", "vadda_d", ++ "vmax_d", "vmax_du", "vmin_d", "vmin_du", ++ "vmul_d", "vmuh_d", "vmuh_du", ++ "vmulwev_q_d", "vmulwod_q_d", "vmulwev_q_du", "vmulwod_q_du", ++ "vmulwev_q_du_d", "vmulwod_q_du_d", ++ "vdiv_d", "vdiv_du", "vmod_d", "vmod_du", "vsigncov_d", ++ "vsll_d", "vsrl_d", "vsra_d", "vrotr_d", "vsrlr_d", "vsrar_d", ++ "vbitclr_d", "vbitset_d", "vbitrev_d", ++ "vseq_d", "vsle_d", "vsle_du", "vslt_d", "vslt_du", ++ "vpackev_d", "vpackod_d", "vpickev_d", "vpickod_d", ++ "vilvl_d", "vilvh_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vaddi_bu", "vsubi_bu", ++ "vmaxi_b", "vmaxi_bu", "vmini_b", "vmini_bu", ++ "vsat_b", "vsat_bu", ++ "vandi_b", "vori_b", "vxori_b", "vnori_b", ++ "vslli_b", "vsrli_b", "vsrai_b", "vrotri_b", ++ "vsrlri_b", "vsrari_b", ++ "vbitclri_b", "vbitseti_b", "vbitrevi_b", ++ "vseqi_b", "vslei_b", "vslei_bu", "vslti_b", "vslti_bu", ++ "vreplvei_b", "vbsll_v", "vbsrl_v", "vshuf4i_b"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vaddi_hu", "vsubi_hu", ++ "vmaxi_h", "vmaxi_hu", "vmini_h", "vmini_hu", ++ "vsat_h", "vsat_hu", ++ "vslli_h", "vsrli_h", "vsrai_h", "vrotri_h", ++ "vsrlri_h", "vsrari_h", ++ "vbitclri_h", "vbitseti_h", "vbitrevi_h", ++ "vseqi_h", "vslei_h", "vslei_hu", "vslti_h", "vslti_hu", ++ "vreplvei_h", "vshuf4i_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vaddi_wu", "vsubi_wu", ++ "vmaxi_w", "vmaxi_wu", "vmini_w", "vmini_wu", ++ "vsat_w", "vsat_wu", ++ "vslli_w", "vsrli_w", "vsrai_w", "vrotri_w", ++ "vsrlri_w", "vsrari_w", ++ "vbitclri_w", "vbitseti_w", "vbitrevi_w", ++ "vseqi_w", "vslei_w", "vslei_wu", "vslti_w", "vslti_wu", ++ "vreplvei_w", "vshuf4i_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vaddi_du", "vsubi_du", ++ "vmaxi_d", "vmaxi_du", "vmini_d", "vmini_du", ++ "vsat_d", "vsat_du", ++ "vslli_d", "vsrli_d", "vsrai_d", "vrotri_d", ++ "vsrlri_d", "vsrari_d", ++ "vbitclri_d", "vbitseti_d", "vbitrevi_d", ++ "vseqi_d", "vslei_d", "vslei_du", "vslti_d", "vslti_du", ++ "vreplvei_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["vhaddw_h_b", "vhaddw_hu_bu", "vhsubw_h_b", "vhsubw_hu_bu", ++ "vaddwev_h_b", "vaddwod_h_b", "vsubwev_h_b", "vsubwod_h_b", ++ "vaddwev_h_bu", "vaddwod_h_bu", "vsubwev_h_bu", "vsubwod_h_bu", ++ "vaddwev_h_bu_b", "vaddwod_h_bu_b", ++ "vmulwev_h_b", "vmulwod_h_b", "vmulwev_h_bu", "vmulwod_h_bu", ++ "vmulwev_h_bu_b", "vmulwod_h_bu_b"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vhaddw_w_h", "vhaddw_wu_hu", "vhsubw_w_h", "vhsubw_wu_hu", ++ "vaddwev_w_h", "vaddwod_w_h", "vsubwev_w_h", "vsubwod_w_h", ++ "vaddwev_w_hu", "vaddwod_w_hu", "vsubwev_w_hu", "vsubwod_w_hu", ++ "vaddwev_w_hu_h", "vaddwod_w_hu_h", ++ "vmulwev_w_h", "vmulwod_w_h", "vmulwev_w_hu", "vmulwod_w_hu", ++ "vmulwev_w_hu_h", "vmulwod_w_hu_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vhaddw_d_w", "vhaddw_du_wu", "vhsubw_d_w", "vhsubw_du_wu", ++ "vaddwev_d_w", "vaddwod_d_w", "vsubwev_d_w", "vsubwod_d_w", ++ "vaddwev_d_wu", "vaddwod_d_wu", "vsubwev_d_wu", "vsubwod_d_wu", ++ "vaddwev_d_wu_w", "vaddwod_d_wu_w", ++ "vmulwev_d_w", "vmulwod_d_w", "vmulwev_d_wu", "vmulwod_d_wu", ++ "vmulwev_d_wu_w", "vmulwod_d_wu_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsrln_b_h", "vsran_b_h", "vsrlrn_b_h", "vsrarn_b_h", ++ "vssrln_b_h", "vssran_b_h", "vssrln_bu_h", "vssran_bu_h", ++ "vssrlrn_b_h", "vssrarn_b_h", "vssrlrn_bu_h", "vssrarn_bu_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsrln_h_w", "vsran_h_w", "vsrlrn_h_w", "vsrarn_h_w", ++ "vssrln_h_w", "vssran_h_w", "vssrln_hu_w", "vssran_hu_w", ++ "vssrlrn_h_w", "vssrarn_h_w", "vssrlrn_hu_w", "vssrarn_hu_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsrln_w_d", "vsran_w_d", "vsrlrn_w_d", "vsrarn_w_d", ++ "vssrln_w_d", "vssran_w_d", "vssrln_wu_d", "vssran_wu_d", ++ "vssrlrn_w_d", "vssrarn_w_d", "vssrlrn_wu_d", "vssrarn_wu_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vmadd_b", "vmsub_b", "vfrstp_b", "vbitsel_v", "vshuf_b"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmadd_h", "vmsub_h", "vfrstp_h", "vshuf_h"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmadd_w", "vmsub_w", "vshuf_w"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmadd_d", "vmsub_d", "vshuf_d"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsrlni_b_h", "vsrani_b_h", "vsrlrni_b_h", "vsrarni_b_h", ++ "vssrlni_b_h", "vssrani_b_h", "vssrlni_bu_h", "vssrani_bu_h", ++ "vssrlrni_b_h", "vssrarni_b_h", "vssrlrni_bu_h", "vssrarni_bu_h", ++ "vfrstpi_b", "vbitseli_b", "vextrins_b"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsrlni_h_w", "vsrani_h_w", "vsrlrni_h_w", "vsrarni_h_w", ++ "vssrlni_h_w", "vssrani_h_w", "vssrlni_hu_w", "vssrani_hu_w", ++ "vssrlrni_h_w", "vssrarni_h_w", "vssrlrni_hu_w", "vssrarni_hu_w", ++ "vfrstpi_h", "vextrins_h"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsrlni_w_d", "vsrani_w_d", "vsrlrni_w_d", "vsrarni_w_d", ++ "vssrlni_w_d", "vssrani_w_d", "vssrlni_wu_d", "vssrani_wu_d", ++ "vssrlrni_w_d", "vssrarni_w_d", "vssrlrni_wu_d", "vssrarni_wu_d", ++ "vpermi_w", "vextrins_w"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsrlni_d_q", "vsrani_d_q", "vsrlrni_d_q", "vsrarni_d_q", ++ "vssrlni_d_q", "vssrani_d_q", "vssrlni_du_q", "vssrani_du_q", ++ "vssrlrni_d_q", "vssrarni_d_q", "vssrlrni_du_q", "vssrarni_du_q", ++ "vshuf4i_d", "vextrins_d"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["vmaddwev_h_b", "vmaddwod_h_b", "vmaddwev_h_bu", ++ "vmaddwod_h_bu", "vmaddwev_h_bu_b", "vmaddwod_h_bu_b"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmaddwev_w_h", "vmaddwod_w_h", "vmaddwev_w_hu", ++ "vmaddwod_w_hu", "vmaddwev_w_hu_h", "vmaddwod_w_hu_h"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmaddwev_d_w", "vmaddwod_d_w", "vmaddwev_d_wu", ++ "vmaddwod_d_wu", "vmaddwev_d_wu_w", "vmaddwod_d_wu_w"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmaddwev_q_d", "vmaddwod_q_d", "vmaddwev_q_du", ++ "vmaddwod_q_du", "vmaddwev_q_du_d", "vmaddwod_q_du_d"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsllwil_h_b", "vsllwil_hu_bu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v16i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsllwil_w_h", "vsllwil_wu_hu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v8i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsllwil_d_w", "vsllwil_du_wu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v4i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["vneg_b", "vmskltz_b", "vmskgez_b", "vmsknz_b", ++ "vclo_b", "vclz_b", "vpcnt_b"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vneg_h", "vmskltz_h", "vclo_h", "vclz_h", "vpcnt_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vneg_w", "vmskltz_w", "vclo_w", "vclz_w", "vpcnt_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vneg_d", "vexth_q_d", "vexth_qu_du", "vmskltz_d", ++ "vextl_q_d", "vextl_qu_du", "vclo_d", "vclz_d", "vpcnt_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vexth_h_b", "vexth_hu_bu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vexth_w_h", "vexth_wu_hu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vexth_d_w", "vexth_du_wu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vldi : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vrepli_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vrepli_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vrepli_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vrepli_d : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lsx_vreplgr2vr_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_d : VecInt<[llvm_v2i64_ty], [llvm_i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vinsgr2vr_b ++ : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vinsgr2vr_h ++ : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vinsgr2vr_w ++ : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vinsgr2vr_d ++ : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lsx_vreplve_b ++ : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_h ++ : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_w ++ : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_d ++ : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++foreach inst = ["vpickve2gr_b", "vpickve2gr_bu" ] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], ++ [llvm_v16i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vpickve2gr_h", "vpickve2gr_hu" ] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], ++ [llvm_v8i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vpickve2gr_w", "vpickve2gr_wu" ] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], ++ [llvm_v4i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vpickve2gr_d", "vpickve2gr_du" ] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_i64_ty], ++ [llvm_v2i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lsx_bz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_bnz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bnz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bnz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bnz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bnz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++// LSX Float ++ ++foreach inst = ["vfadd_s", "vfsub_s", "vfmul_s", "vfdiv_s", ++ "vfmax_s", "vfmin_s", "vfmaxa_s", "vfmina_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], ++ [llvm_v4f32_ty, llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfadd_d", "vfsub_d", "vfmul_d", "vfdiv_d", ++ "vfmax_d", "vfmin_d", "vfmaxa_d", "vfmina_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vfmadd_s", "vfmsub_s", "vfnmadd_s", "vfnmsub_s"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v4f32_ty], ++ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfmadd_d", "vfmsub_d", "vfnmadd_d", "vfnmsub_d"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2f64_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vflogb_s", "vfsqrt_s", "vfrecip_s", "vfrsqrt_s", "vfrint_s", ++ "vfrintrne_s", "vfrintrz_s", "vfrintrp_s", "vfrintrm_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vflogb_d", "vfsqrt_d", "vfrecip_d", "vfrsqrt_d", "vfrint_d", ++ "vfrintrne_d", "vfrintrz_d", "vfrintrp_d", "vfrintrm_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vfcvtl_s_h", "vfcvth_s_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfcvtl_d_s", "vfcvth_d_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4f32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vftintrne_w_s", "vftintrz_w_s", "vftintrp_w_s", "vftintrm_w_s", ++ "vftint_w_s", "vftintrz_wu_s", "vftint_wu_s", "vfclass_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vftintrne_l_d", "vftintrz_l_d", "vftintrp_l_d", "vftintrm_l_d", ++ "vftint_l_d", "vftintrz_lu_d", "vftint_lu_d", "vfclass_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vftintrnel_l_s", "vftintrneh_l_s", "vftintrzl_l_s", ++ "vftintrzh_l_s", "vftintrpl_l_s", "vftintrph_l_s", ++ "vftintrml_l_s", "vftintrmh_l_s", "vftintl_l_s", ++ "vftinth_l_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4f32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vffint_s_w", "vffint_s_wu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vffint_d_l", "vffint_d_lu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vffintl_d_w", "vffinth_d_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vffint_s_l"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++foreach inst = ["vftintrne_w_d", "vftintrz_w_d", "vftintrp_w_d", "vftintrm_w_d", ++ "vftint_w_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vfcvt_h_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v4f32_ty, llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfcvt_s_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vfcmp_caf_s", "vfcmp_cun_s", "vfcmp_ceq_s", "vfcmp_cueq_s", ++ "vfcmp_clt_s", "vfcmp_cult_s", "vfcmp_cle_s", "vfcmp_cule_s", ++ "vfcmp_cne_s", "vfcmp_cor_s", "vfcmp_cune_s", ++ "vfcmp_saf_s", "vfcmp_sun_s", "vfcmp_seq_s", "vfcmp_sueq_s", ++ "vfcmp_slt_s", "vfcmp_sult_s", "vfcmp_sle_s", "vfcmp_sule_s", ++ "vfcmp_sne_s", "vfcmp_sor_s", "vfcmp_sune_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v4f32_ty, llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfcmp_caf_d", "vfcmp_cun_d", "vfcmp_ceq_d", "vfcmp_cueq_d", ++ "vfcmp_clt_d", "vfcmp_cult_d", "vfcmp_cle_d", "vfcmp_cule_d", ++ "vfcmp_cne_d", "vfcmp_cor_d", "vfcmp_cune_d", ++ "vfcmp_saf_d", "vfcmp_sun_d", "vfcmp_seq_d", "vfcmp_sueq_d", ++ "vfcmp_slt_d", "vfcmp_sult_d", "vfcmp_sle_d", "vfcmp_sule_d", ++ "vfcmp_sne_d", "vfcmp_sor_d", "vfcmp_sune_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++// LSX load/store ++def int_loongarch_lsx_vld ++ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vldx ++ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vldrepl_b ++ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vldrepl_h ++ : VecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vldrepl_w ++ : VecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vldrepl_d ++ : VecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++ ++def int_loongarch_lsx_vst ++ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vstx ++ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], ++ [IntrWriteMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vstelm_b ++ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lsx_vstelm_h ++ : VecInt<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lsx_vstelm_w ++ : VecInt<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lsx_vstelm_d ++ : VecInt<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++ ++} // TargetPrefix = "loongarch" +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +index ae7167cb5ce7..f55184019988 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +@@ -15,6 +15,7 @@ + #include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "MCTargetDesc/LoongArchMatInt.h" + #include "llvm/Support/KnownBits.h" ++#include "llvm/Support/raw_ostream.h" + + using namespace llvm; + +@@ -75,7 +76,14 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { + ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); + return; + } +- // TODO: Add selection nodes needed later. ++ case ISD::BITCAST: { ++ if (VT.is128BitVector() || VT.is512BitVector()) { ++ ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); ++ CurDAG->RemoveDeadNode(Node); ++ return; ++ } ++ break; ++ } + } + + // Select the default instruction. +@@ -262,6 +270,96 @@ bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { + return false; + } + ++bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, ++ unsigned MinSizeInBits) const { ++ if (!Subtarget->hasExtLSX()) ++ return false; ++ ++ BuildVectorSDNode *Node = dyn_cast(N); ++ ++ if (!Node) ++ return false; ++ ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ MinSizeInBits, /*IsBigEndian=*/false)) ++ return false; ++ ++ Imm = SplatValue; ++ ++ return true; ++} ++ ++template ++bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ if (IsSigned && ImmValue.isSignedIntN(ImmBitSize)) { ++ SplatVal = CurDAG->getTargetConstant(ImmValue.getSExtValue(), SDLoc(N), ++ Subtarget->getGRLenVT()); ++ return true; ++ } ++ if (!IsSigned && ImmValue.isIntN(ImmBitSize)) { ++ SplatVal = CurDAG->getTargetConstant(ImmValue.getZExtValue(), SDLoc(N), ++ Subtarget->getGRLenVT()); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, ++ SDValue &SplatImm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ int32_t Log2 = (~ImmValue).exactLogBase2(); ++ ++ if (Log2 != -1) { ++ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, ++ SDValue &SplatImm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ int32_t Log2 = ImmValue.exactLogBase2(); ++ ++ if (Log2 != -1) { ++ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ + // This pass converts a legalized DAG into a LoongArch-specific DAG, ready + // for instruction scheduling. + FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +index 3099407aea3e..5e3d6ccc3755 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +@@ -56,6 +56,14 @@ public: + bool selectSExti32(SDValue N, SDValue &Val); + bool selectZExti32(SDValue N, SDValue &Val); + ++ bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; ++ ++ template ++ bool selectVSplatImm(SDValue N, SDValue &SplatVal); ++ ++ bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const; ++ bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const; ++ + // Include the pieces autogenerated from the target description. + #include "LoongArchGenDAGISel.inc" + }; +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index db5961fc501a..c05133647929 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -62,6 +62,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + MVT::v4i64}) + addRegisterClass(VT, &LoongArch::LASX256RegClass); + ++ static const MVT::SimpleValueType LSXVTs[] = { ++ MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; ++ ++ if (Subtarget.hasExtLSX()) ++ for (MVT VT : LSXVTs) ++ addRegisterClass(VT, &LoongArch::LSX128RegClass); ++ + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, + MVT::i1, Promote); + +@@ -109,6 +116,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); + if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + if (Subtarget.hasBasicF()) +@@ -138,6 +146,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + } + + static const ISD::CondCode FPCCToExpand[] = { +@@ -194,6 +203,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + } + ++ if (Subtarget.hasExtLSX()) ++ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, ++ {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); ++ + // Compute derived properties from the register classes. + computeRegisterProperties(Subtarget.getRegisterInfo()); + +@@ -215,6 +228,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRL); ++ if (Subtarget.hasExtLSX()) ++ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + } + + bool LoongArchTargetLowering::isOffsetFoldingLegal( +@@ -652,9 +667,24 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, + return Addr; + } + ++template ++static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, ++ SelectionDAG &DAG, bool IsSigned = false) { ++ auto *CImm = cast(Op->getOperand(ImmOp)); ++ // Check the ImmArg. ++ if ((IsSigned && !isInt(CImm->getSExtValue())) || ++ (!IsSigned && !isUInt(CImm->getZExtValue()))) { ++ DAG.getContext()->emitError(Op->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); ++ } ++ return SDValue(); ++} ++ + SDValue + LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { ++ SDLoc DL(Op); + switch (Op.getConstantOperandVal(0)) { + default: + return SDValue(); // Don't custom lower most intrinsics. +@@ -662,6 +692,141 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + return DAG.getRegister(LoongArch::R2, PtrVT); + } ++ case Intrinsic::loongarch_lsx_vpickve2gr_d: ++ case Intrinsic::loongarch_lsx_vpickve2gr_du: ++ case Intrinsic::loongarch_lsx_vreplvei_d: ++ return checkIntrinsicImmArg<1>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vreplvei_w: ++ return checkIntrinsicImmArg<2>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vsat_b: ++ case Intrinsic::loongarch_lsx_vsat_bu: ++ case Intrinsic::loongarch_lsx_vrotri_b: ++ case Intrinsic::loongarch_lsx_vsllwil_h_b: ++ case Intrinsic::loongarch_lsx_vsllwil_hu_bu: ++ case Intrinsic::loongarch_lsx_vsrlri_b: ++ case Intrinsic::loongarch_lsx_vsrari_b: ++ case Intrinsic::loongarch_lsx_vreplvei_h: ++ return checkIntrinsicImmArg<3>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vsat_h: ++ case Intrinsic::loongarch_lsx_vsat_hu: ++ case Intrinsic::loongarch_lsx_vrotri_h: ++ case Intrinsic::loongarch_lsx_vsllwil_w_h: ++ case Intrinsic::loongarch_lsx_vsllwil_wu_hu: ++ case Intrinsic::loongarch_lsx_vsrlri_h: ++ case Intrinsic::loongarch_lsx_vsrari_h: ++ case Intrinsic::loongarch_lsx_vreplvei_b: ++ return checkIntrinsicImmArg<4>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vsrlni_b_h: ++ case Intrinsic::loongarch_lsx_vsrani_b_h: ++ case Intrinsic::loongarch_lsx_vsrlrni_b_h: ++ case Intrinsic::loongarch_lsx_vsrarni_b_h: ++ case Intrinsic::loongarch_lsx_vssrlni_b_h: ++ case Intrinsic::loongarch_lsx_vssrani_b_h: ++ case Intrinsic::loongarch_lsx_vssrlni_bu_h: ++ case Intrinsic::loongarch_lsx_vssrani_bu_h: ++ case Intrinsic::loongarch_lsx_vssrlrni_b_h: ++ case Intrinsic::loongarch_lsx_vssrarni_b_h: ++ case Intrinsic::loongarch_lsx_vssrlrni_bu_h: ++ case Intrinsic::loongarch_lsx_vssrarni_bu_h: ++ return checkIntrinsicImmArg<4>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vsat_w: ++ case Intrinsic::loongarch_lsx_vsat_wu: ++ case Intrinsic::loongarch_lsx_vrotri_w: ++ case Intrinsic::loongarch_lsx_vsllwil_d_w: ++ case Intrinsic::loongarch_lsx_vsllwil_du_wu: ++ case Intrinsic::loongarch_lsx_vsrlri_w: ++ case Intrinsic::loongarch_lsx_vsrari_w: ++ case Intrinsic::loongarch_lsx_vslei_bu: ++ case Intrinsic::loongarch_lsx_vslei_hu: ++ case Intrinsic::loongarch_lsx_vslei_wu: ++ case Intrinsic::loongarch_lsx_vslei_du: ++ case Intrinsic::loongarch_lsx_vslti_bu: ++ case Intrinsic::loongarch_lsx_vslti_hu: ++ case Intrinsic::loongarch_lsx_vslti_wu: ++ case Intrinsic::loongarch_lsx_vslti_du: ++ case Intrinsic::loongarch_lsx_vbsll_v: ++ case Intrinsic::loongarch_lsx_vbsrl_v: ++ return checkIntrinsicImmArg<5>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vseqi_b: ++ case Intrinsic::loongarch_lsx_vseqi_h: ++ case Intrinsic::loongarch_lsx_vseqi_w: ++ case Intrinsic::loongarch_lsx_vseqi_d: ++ case Intrinsic::loongarch_lsx_vslei_b: ++ case Intrinsic::loongarch_lsx_vslei_h: ++ case Intrinsic::loongarch_lsx_vslei_w: ++ case Intrinsic::loongarch_lsx_vslei_d: ++ case Intrinsic::loongarch_lsx_vslti_b: ++ case Intrinsic::loongarch_lsx_vslti_h: ++ case Intrinsic::loongarch_lsx_vslti_w: ++ case Intrinsic::loongarch_lsx_vslti_d: ++ return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); ++ case Intrinsic::loongarch_lsx_vsrlni_h_w: ++ case Intrinsic::loongarch_lsx_vsrani_h_w: ++ case Intrinsic::loongarch_lsx_vsrlrni_h_w: ++ case Intrinsic::loongarch_lsx_vsrarni_h_w: ++ case Intrinsic::loongarch_lsx_vssrlni_h_w: ++ case Intrinsic::loongarch_lsx_vssrani_h_w: ++ case Intrinsic::loongarch_lsx_vssrlni_hu_w: ++ case Intrinsic::loongarch_lsx_vssrani_hu_w: ++ case Intrinsic::loongarch_lsx_vssrlrni_h_w: ++ case Intrinsic::loongarch_lsx_vssrarni_h_w: ++ case Intrinsic::loongarch_lsx_vssrlrni_hu_w: ++ case Intrinsic::loongarch_lsx_vssrarni_hu_w: ++ case Intrinsic::loongarch_lsx_vfrstpi_b: ++ case Intrinsic::loongarch_lsx_vfrstpi_h: ++ return checkIntrinsicImmArg<5>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vsat_d: ++ case Intrinsic::loongarch_lsx_vsat_du: ++ case Intrinsic::loongarch_lsx_vrotri_d: ++ case Intrinsic::loongarch_lsx_vsrlri_d: ++ case Intrinsic::loongarch_lsx_vsrari_d: ++ return checkIntrinsicImmArg<6>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vsrlni_w_d: ++ case Intrinsic::loongarch_lsx_vsrani_w_d: ++ case Intrinsic::loongarch_lsx_vsrlrni_w_d: ++ case Intrinsic::loongarch_lsx_vsrarni_w_d: ++ case Intrinsic::loongarch_lsx_vssrlni_w_d: ++ case Intrinsic::loongarch_lsx_vssrani_w_d: ++ case Intrinsic::loongarch_lsx_vssrlni_wu_d: ++ case Intrinsic::loongarch_lsx_vssrani_wu_d: ++ case Intrinsic::loongarch_lsx_vssrlrni_w_d: ++ case Intrinsic::loongarch_lsx_vssrarni_w_d: ++ case Intrinsic::loongarch_lsx_vssrlrni_wu_d: ++ case Intrinsic::loongarch_lsx_vssrarni_wu_d: ++ return checkIntrinsicImmArg<6>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vsrlni_d_q: ++ case Intrinsic::loongarch_lsx_vsrani_d_q: ++ case Intrinsic::loongarch_lsx_vsrlrni_d_q: ++ case Intrinsic::loongarch_lsx_vsrarni_d_q: ++ case Intrinsic::loongarch_lsx_vssrlni_d_q: ++ case Intrinsic::loongarch_lsx_vssrani_d_q: ++ case Intrinsic::loongarch_lsx_vssrlni_du_q: ++ case Intrinsic::loongarch_lsx_vssrani_du_q: ++ case Intrinsic::loongarch_lsx_vssrlrni_d_q: ++ case Intrinsic::loongarch_lsx_vssrarni_d_q: ++ case Intrinsic::loongarch_lsx_vssrlrni_du_q: ++ case Intrinsic::loongarch_lsx_vssrarni_du_q: ++ return checkIntrinsicImmArg<7>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vnori_b: ++ case Intrinsic::loongarch_lsx_vshuf4i_b: ++ case Intrinsic::loongarch_lsx_vshuf4i_h: ++ case Intrinsic::loongarch_lsx_vshuf4i_w: ++ return checkIntrinsicImmArg<8>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vshuf4i_d: ++ case Intrinsic::loongarch_lsx_vpermi_w: ++ case Intrinsic::loongarch_lsx_vbitseli_b: ++ case Intrinsic::loongarch_lsx_vextrins_b: ++ case Intrinsic::loongarch_lsx_vextrins_h: ++ case Intrinsic::loongarch_lsx_vextrins_w: ++ case Intrinsic::loongarch_lsx_vextrins_d: ++ return checkIntrinsicImmArg<8>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vrepli_b: ++ case Intrinsic::loongarch_lsx_vrepli_h: ++ case Intrinsic::loongarch_lsx_vrepli_w: ++ case Intrinsic::loongarch_lsx_vrepli_d: ++ return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); ++ case Intrinsic::loongarch_lsx_vldi: ++ return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); + } + } + +@@ -757,6 +922,29 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, + {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + } ++ case Intrinsic::loongarch_lsx_vld: ++ case Intrinsic::loongarch_lsx_vldrepl_b: ++ return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) ++ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vldrepl_h: ++ return !isShiftedInt<11, 1>( ++ cast(Op.getOperand(3))->getSExtValue()) ++ ? emitIntrinsicWithChainErrorMessage( ++ Op, "argument out of range or not a multiple of 2", DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vldrepl_w: ++ return !isShiftedInt<10, 2>( ++ cast(Op.getOperand(3))->getSExtValue()) ++ ? emitIntrinsicWithChainErrorMessage( ++ Op, "argument out of range or not a multiple of 4", DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vldrepl_d: ++ return !isShiftedInt<9, 3>( ++ cast(Op.getOperand(3))->getSExtValue()) ++ ? emitIntrinsicWithChainErrorMessage( ++ Op, "argument out of range or not a multiple of 8", DAG) ++ : SDValue(); + } + } + +@@ -875,6 +1063,36 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : Op; + } ++ case Intrinsic::loongarch_lsx_vst: ++ return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vstelm_b: ++ return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vstelm_h: ++ return (!isShiftedInt<8, 1>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 2", DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vstelm_w: ++ return (!isShiftedInt<8, 2>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 4", DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vstelm_d: ++ return (!isShiftedInt<8, 3>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<1>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 8", DAG) ++ : SDValue(); + } + } + +@@ -1026,16 +1244,110 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); + } + +-// Helper function that emits error message for intrinsics with chain and return +-// a UNDEF and the chain as the results. +-static void emitErrorAndReplaceIntrinsicWithChainResults( ++// Helper function that emits error message for intrinsics with/without chain ++// and return a UNDEF or and the chain as the results. ++static void emitErrorAndReplaceIntrinsicResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, +- StringRef ErrorMsg) { ++ StringRef ErrorMsg, bool WithChain = true) { + DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); + Results.push_back(DAG.getUNDEF(N->getValueType(0))); ++ if (!WithChain) ++ return; + Results.push_back(N->getOperand(0)); + } + ++template ++static void ++replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl &Results, ++ SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, ++ unsigned ResOp) { ++ const StringRef ErrorMsgOOR = "argument out of range"; ++ unsigned Imm = cast(Node->getOperand(2))->getZExtValue(); ++ if (!isUInt(Imm)) { ++ emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, ++ /*WithChain=*/false); ++ return; ++ } ++ SDLoc DL(Node); ++ SDValue Vec = Node->getOperand(1); ++ ++ SDValue PickElt = ++ DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, ++ DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), ++ DAG.getValueType(Vec.getValueType().getVectorElementType())); ++ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), ++ PickElt.getValue(0))); ++} ++ ++static void replaceVecCondBranchResults(SDNode *N, ++ SmallVectorImpl &Results, ++ SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget, ++ unsigned ResOp) { ++ SDLoc DL(N); ++ SDValue Vec = N->getOperand(1); ++ ++ SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); ++ Results.push_back( ++ DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); ++} ++ ++static void ++replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, ++ SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget) { ++ switch (N->getConstantOperandVal(0)) { ++ default: ++ llvm_unreachable("Unexpected Intrinsic."); ++ case Intrinsic::loongarch_lsx_vpickve2gr_b: ++ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_SEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_h: ++ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_SEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_w: ++ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_SEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_bu: ++ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_ZEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_hu: ++ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_ZEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_wu: ++ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_ZEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_bz_b: ++ case Intrinsic::loongarch_lsx_bz_h: ++ case Intrinsic::loongarch_lsx_bz_w: ++ case Intrinsic::loongarch_lsx_bz_d: ++ replaceVecCondBranchResults(N, Results, DAG, Subtarget, ++ LoongArchISD::VALL_ZERO); ++ break; ++ case Intrinsic::loongarch_lsx_bz_v: ++ replaceVecCondBranchResults(N, Results, DAG, Subtarget, ++ LoongArchISD::VANY_ZERO); ++ break; ++ case Intrinsic::loongarch_lsx_bnz_b: ++ case Intrinsic::loongarch_lsx_bnz_h: ++ case Intrinsic::loongarch_lsx_bnz_w: ++ case Intrinsic::loongarch_lsx_bnz_d: ++ replaceVecCondBranchResults(N, Results, DAG, Subtarget, ++ LoongArchISD::VALL_NONZERO); ++ break; ++ case Intrinsic::loongarch_lsx_bnz_v: ++ replaceVecCondBranchResults(N, Results, DAG, Subtarget, ++ LoongArchISD::VANY_NONZERO); ++ break; ++ } ++} ++ + void LoongArchTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + SDLoc DL(N); +@@ -1168,14 +1480,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( + llvm_unreachable("Unexpected Intrinsic."); + case Intrinsic::loongarch_movfcsr2gr: { + if (!Subtarget.hasBasicF()) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgReqF); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); + return; + } + unsigned Imm = cast(Op2)->getZExtValue(); + if (!isUInt<2>(Imm)) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgOOR); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); + return; + } + SDValue MOVFCSR2GRResults = DAG.getNode( +@@ -1211,7 +1521,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + {Chain, Op2, \ + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ +- Results.push_back(NODE.getValue(1)); \ ++ Results.push_back(NODE.getValue(1)); \ + break; \ + } + CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) +@@ -1220,8 +1530,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + #define CSR_CASE(ID) \ + case Intrinsic::loongarch_##ID: { \ + if (!Subtarget.is64Bit()) \ +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ +- ErrorMsgReqLA64); \ ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ + break; \ + } + CSR_CASE(csrrd_d); +@@ -1232,8 +1541,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + case Intrinsic::loongarch_csrrd_w: { + unsigned Imm = cast(Op2)->getZExtValue(); + if (!isUInt<14>(Imm)) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgOOR); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); + return; + } + SDValue CSRRDResults = +@@ -1247,8 +1555,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + case Intrinsic::loongarch_csrwr_w: { + unsigned Imm = cast(N->getOperand(3))->getZExtValue(); + if (!isUInt<14>(Imm)) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgOOR); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); + return; + } + SDValue CSRWRResults = +@@ -1263,8 +1570,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + case Intrinsic::loongarch_csrxchg_w: { + unsigned Imm = cast(N->getOperand(4))->getZExtValue(); + if (!isUInt<14>(Imm)) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgOOR); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); + return; + } + SDValue CSRXCHGResults = DAG.getNode( +@@ -1302,8 +1608,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + } + case Intrinsic::loongarch_lddir_d: { + if (!Subtarget.is64Bit()) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgReqLA64); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); + return; + } + break; +@@ -1322,6 +1627,10 @@ void LoongArchTargetLowering::ReplaceNodeResults( + Results.push_back(N->getOperand(0)); + break; + } ++ case ISD::INTRINSIC_WO_CHAIN: { ++ replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); ++ break; ++ } + } + } + +@@ -1685,6 +1994,440 @@ static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, + Src.getOperand(0)); + } + ++template ++static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, ++ SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget, ++ bool IsSigned = false) { ++ SDLoc DL(Node); ++ auto *CImm = cast(Node->getOperand(ImmOp)); ++ // Check the ImmArg. ++ if ((IsSigned && !isInt(CImm->getSExtValue())) || ++ (!IsSigned && !isUInt(CImm->getZExtValue()))) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); ++ } ++ return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); ++} ++ ++template ++static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, ++ SelectionDAG &DAG, bool IsSigned = false) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ auto *CImm = cast(Node->getOperand(ImmOp)); ++ ++ // Check the ImmArg. ++ if ((IsSigned && !isInt(CImm->getSExtValue())) || ++ (!IsSigned && !isUInt(CImm->getZExtValue()))) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, ResTy); ++ } ++ return DAG.getConstant( ++ APInt(ResTy.getScalarType().getSizeInBits(), ++ IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), ++ DL, ResTy); ++} ++ ++static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ SDValue Vec = Node->getOperand(2); ++ SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); ++ return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); ++} ++ ++static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ SDValue One = DAG.getConstant(1, DL, ResTy); ++ SDValue Bit = ++ DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); ++ ++ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), ++ DAG.getNOT(DL, Bit, ResTy)); ++} ++ ++template ++static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ auto *CImm = cast(Node->getOperand(2)); ++ // Check the unsigned ImmArg. ++ if (!isUInt(CImm->getZExtValue())) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, ResTy); ++ } ++ ++ APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); ++ SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); ++ ++ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); ++} ++ ++template ++static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ auto *CImm = cast(Node->getOperand(2)); ++ // Check the unsigned ImmArg. ++ if (!isUInt(CImm->getZExtValue())) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, ResTy); ++ } ++ ++ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); ++ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); ++ return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); ++} ++ ++template ++static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ auto *CImm = cast(Node->getOperand(2)); ++ // Check the unsigned ImmArg. ++ if (!isUInt(CImm->getZExtValue())) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, ResTy); ++ } ++ ++ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); ++ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); ++ return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); ++} ++ ++static SDValue ++performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(N); ++ switch (N->getConstantOperandVal(0)) { ++ default: ++ break; ++ case Intrinsic::loongarch_lsx_vadd_b: ++ case Intrinsic::loongarch_lsx_vadd_h: ++ case Intrinsic::loongarch_lsx_vadd_w: ++ case Intrinsic::loongarch_lsx_vadd_d: ++ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vaddi_bu: ++ case Intrinsic::loongarch_lsx_vaddi_hu: ++ case Intrinsic::loongarch_lsx_vaddi_wu: ++ case Intrinsic::loongarch_lsx_vaddi_du: ++ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsub_b: ++ case Intrinsic::loongarch_lsx_vsub_h: ++ case Intrinsic::loongarch_lsx_vsub_w: ++ case Intrinsic::loongarch_lsx_vsub_d: ++ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vsubi_bu: ++ case Intrinsic::loongarch_lsx_vsubi_hu: ++ case Intrinsic::loongarch_lsx_vsubi_wu: ++ case Intrinsic::loongarch_lsx_vsubi_du: ++ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vneg_b: ++ case Intrinsic::loongarch_lsx_vneg_h: ++ case Intrinsic::loongarch_lsx_vneg_w: ++ case Intrinsic::loongarch_lsx_vneg_d: ++ return DAG.getNode( ++ ISD::SUB, DL, N->getValueType(0), ++ DAG.getConstant( ++ APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, ++ /*isSigned=*/true), ++ SDLoc(N), N->getValueType(0)), ++ N->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vmax_b: ++ case Intrinsic::loongarch_lsx_vmax_h: ++ case Intrinsic::loongarch_lsx_vmax_w: ++ case Intrinsic::loongarch_lsx_vmax_d: ++ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmax_bu: ++ case Intrinsic::loongarch_lsx_vmax_hu: ++ case Intrinsic::loongarch_lsx_vmax_wu: ++ case Intrinsic::loongarch_lsx_vmax_du: ++ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmaxi_b: ++ case Intrinsic::loongarch_lsx_vmaxi_h: ++ case Intrinsic::loongarch_lsx_vmaxi_w: ++ case Intrinsic::loongarch_lsx_vmaxi_d: ++ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); ++ case Intrinsic::loongarch_lsx_vmaxi_bu: ++ case Intrinsic::loongarch_lsx_vmaxi_hu: ++ case Intrinsic::loongarch_lsx_vmaxi_wu: ++ case Intrinsic::loongarch_lsx_vmaxi_du: ++ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vmin_b: ++ case Intrinsic::loongarch_lsx_vmin_h: ++ case Intrinsic::loongarch_lsx_vmin_w: ++ case Intrinsic::loongarch_lsx_vmin_d: ++ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmin_bu: ++ case Intrinsic::loongarch_lsx_vmin_hu: ++ case Intrinsic::loongarch_lsx_vmin_wu: ++ case Intrinsic::loongarch_lsx_vmin_du: ++ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmini_b: ++ case Intrinsic::loongarch_lsx_vmini_h: ++ case Intrinsic::loongarch_lsx_vmini_w: ++ case Intrinsic::loongarch_lsx_vmini_d: ++ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); ++ case Intrinsic::loongarch_lsx_vmini_bu: ++ case Intrinsic::loongarch_lsx_vmini_hu: ++ case Intrinsic::loongarch_lsx_vmini_wu: ++ case Intrinsic::loongarch_lsx_vmini_du: ++ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vmul_b: ++ case Intrinsic::loongarch_lsx_vmul_h: ++ case Intrinsic::loongarch_lsx_vmul_w: ++ case Intrinsic::loongarch_lsx_vmul_d: ++ return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmadd_b: ++ case Intrinsic::loongarch_lsx_vmadd_h: ++ case Intrinsic::loongarch_lsx_vmadd_w: ++ case Intrinsic::loongarch_lsx_vmadd_d: { ++ EVT ResTy = N->getValueType(0); ++ return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), ++ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), ++ N->getOperand(3))); ++ } ++ case Intrinsic::loongarch_lsx_vmsub_b: ++ case Intrinsic::loongarch_lsx_vmsub_h: ++ case Intrinsic::loongarch_lsx_vmsub_w: ++ case Intrinsic::loongarch_lsx_vmsub_d: { ++ EVT ResTy = N->getValueType(0); ++ return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), ++ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), ++ N->getOperand(3))); ++ } ++ case Intrinsic::loongarch_lsx_vdiv_b: ++ case Intrinsic::loongarch_lsx_vdiv_h: ++ case Intrinsic::loongarch_lsx_vdiv_w: ++ case Intrinsic::loongarch_lsx_vdiv_d: ++ return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vdiv_bu: ++ case Intrinsic::loongarch_lsx_vdiv_hu: ++ case Intrinsic::loongarch_lsx_vdiv_wu: ++ case Intrinsic::loongarch_lsx_vdiv_du: ++ return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmod_b: ++ case Intrinsic::loongarch_lsx_vmod_h: ++ case Intrinsic::loongarch_lsx_vmod_w: ++ case Intrinsic::loongarch_lsx_vmod_d: ++ return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmod_bu: ++ case Intrinsic::loongarch_lsx_vmod_hu: ++ case Intrinsic::loongarch_lsx_vmod_wu: ++ case Intrinsic::loongarch_lsx_vmod_du: ++ return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vand_v: ++ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vor_v: ++ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vxor_v: ++ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vnor_v: { ++ SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ return DAG.getNOT(DL, Res, Res->getValueType(0)); ++ } ++ case Intrinsic::loongarch_lsx_vandi_b: ++ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<8>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vori_b: ++ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<8>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vxori_b: ++ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<8>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsll_b: ++ case Intrinsic::loongarch_lsx_vsll_h: ++ case Intrinsic::loongarch_lsx_vsll_w: ++ case Intrinsic::loongarch_lsx_vsll_d: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ truncateVecElts(N, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_b: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<3>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_h: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<4>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_w: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_d: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<6>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrl_b: ++ case Intrinsic::loongarch_lsx_vsrl_h: ++ case Intrinsic::loongarch_lsx_vsrl_w: ++ case Intrinsic::loongarch_lsx_vsrl_d: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ truncateVecElts(N, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_b: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<3>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_h: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<4>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_w: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_d: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<6>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsra_b: ++ case Intrinsic::loongarch_lsx_vsra_h: ++ case Intrinsic::loongarch_lsx_vsra_w: ++ case Intrinsic::loongarch_lsx_vsra_d: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ truncateVecElts(N, DAG)); ++ case Intrinsic::loongarch_lsx_vsrai_b: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<3>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrai_h: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<4>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrai_w: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrai_d: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<6>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vpcnt_b: ++ case Intrinsic::loongarch_lsx_vpcnt_h: ++ case Intrinsic::loongarch_lsx_vpcnt_w: ++ case Intrinsic::loongarch_lsx_vpcnt_d: ++ return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vbitclr_b: ++ case Intrinsic::loongarch_lsx_vbitclr_h: ++ case Intrinsic::loongarch_lsx_vbitclr_w: ++ case Intrinsic::loongarch_lsx_vbitclr_d: ++ return lowerVectorBitClear(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitclri_b: ++ return lowerVectorBitClearImm<3>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitclri_h: ++ return lowerVectorBitClearImm<4>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitclri_w: ++ return lowerVectorBitClearImm<5>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitclri_d: ++ return lowerVectorBitClearImm<6>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitset_b: ++ case Intrinsic::loongarch_lsx_vbitset_h: ++ case Intrinsic::loongarch_lsx_vbitset_w: ++ case Intrinsic::loongarch_lsx_vbitset_d: { ++ EVT VecTy = N->getValueType(0); ++ SDValue One = DAG.getConstant(1, DL, VecTy); ++ return DAG.getNode( ++ ISD::OR, DL, VecTy, N->getOperand(1), ++ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); ++ } ++ case Intrinsic::loongarch_lsx_vbitseti_b: ++ return lowerVectorBitSetImm<3>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitseti_h: ++ return lowerVectorBitSetImm<4>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitseti_w: ++ return lowerVectorBitSetImm<5>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitseti_d: ++ return lowerVectorBitSetImm<6>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitrev_b: ++ case Intrinsic::loongarch_lsx_vbitrev_h: ++ case Intrinsic::loongarch_lsx_vbitrev_w: ++ case Intrinsic::loongarch_lsx_vbitrev_d: { ++ EVT VecTy = N->getValueType(0); ++ SDValue One = DAG.getConstant(1, DL, VecTy); ++ return DAG.getNode( ++ ISD::XOR, DL, VecTy, N->getOperand(1), ++ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); ++ } ++ case Intrinsic::loongarch_lsx_vbitrevi_b: ++ return lowerVectorBitRevImm<3>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitrevi_h: ++ return lowerVectorBitRevImm<4>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitrevi_w: ++ return lowerVectorBitRevImm<5>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitrevi_d: ++ return lowerVectorBitRevImm<6>(N, DAG); ++ case Intrinsic::loongarch_lsx_vfadd_s: ++ case Intrinsic::loongarch_lsx_vfadd_d: ++ return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfsub_s: ++ case Intrinsic::loongarch_lsx_vfsub_d: ++ return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfmul_s: ++ case Intrinsic::loongarch_lsx_vfmul_d: ++ return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfdiv_s: ++ case Intrinsic::loongarch_lsx_vfdiv_d: ++ return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfmadd_s: ++ case Intrinsic::loongarch_lsx_vfmadd_d: ++ return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2), N->getOperand(3)); ++ case Intrinsic::loongarch_lsx_vinsgr2vr_b: ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), ++ N->getOperand(1), N->getOperand(2), ++ legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); ++ case Intrinsic::loongarch_lsx_vinsgr2vr_h: ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), ++ N->getOperand(1), N->getOperand(2), ++ legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); ++ case Intrinsic::loongarch_lsx_vinsgr2vr_w: ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), ++ N->getOperand(1), N->getOperand(2), ++ legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); ++ case Intrinsic::loongarch_lsx_vinsgr2vr_d: ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), ++ N->getOperand(1), N->getOperand(2), ++ legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); ++ case Intrinsic::loongarch_lsx_vreplgr2vr_b: ++ case Intrinsic::loongarch_lsx_vreplgr2vr_h: ++ case Intrinsic::loongarch_lsx_vreplgr2vr_w: ++ case Intrinsic::loongarch_lsx_vreplgr2vr_d: { ++ EVT ResTy = N->getValueType(0); ++ SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); ++ return DAG.getBuildVector(ResTy, DL, Ops); ++ } ++ case Intrinsic::loongarch_lsx_vreplve_b: ++ case Intrinsic::loongarch_lsx_vreplve_h: ++ case Intrinsic::loongarch_lsx_vreplve_w: ++ case Intrinsic::loongarch_lsx_vreplve_d: ++ return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), ++ N->getOperand(1), ++ DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), ++ N->getOperand(2))); ++ } ++ return SDValue(); ++} ++ + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; +@@ -1699,6 +2442,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, + return performSRLCombine(N, DAG, DCI, Subtarget); + case LoongArchISD::BITREV_W: + return performBITREV_WCombine(N, DAG, DCI, Subtarget); ++ case ISD::INTRINSIC_WO_CHAIN: ++ return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); + } + return SDValue(); + } +@@ -1752,6 +2497,101 @@ static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, + return SinkMBB; + } + ++static MachineBasicBlock * ++emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, ++ const LoongArchSubtarget &Subtarget) { ++ unsigned CondOpc; ++ switch (MI.getOpcode()) { ++ default: ++ llvm_unreachable("Unexpected opcode"); ++ case LoongArch::PseudoVBZ: ++ CondOpc = LoongArch::VSETEQZ_V; ++ break; ++ case LoongArch::PseudoVBZ_B: ++ CondOpc = LoongArch::VSETANYEQZ_B; ++ break; ++ case LoongArch::PseudoVBZ_H: ++ CondOpc = LoongArch::VSETANYEQZ_H; ++ break; ++ case LoongArch::PseudoVBZ_W: ++ CondOpc = LoongArch::VSETANYEQZ_W; ++ break; ++ case LoongArch::PseudoVBZ_D: ++ CondOpc = LoongArch::VSETANYEQZ_D; ++ break; ++ case LoongArch::PseudoVBNZ: ++ CondOpc = LoongArch::VSETNEZ_V; ++ break; ++ case LoongArch::PseudoVBNZ_B: ++ CondOpc = LoongArch::VSETALLNEZ_B; ++ break; ++ case LoongArch::PseudoVBNZ_H: ++ CondOpc = LoongArch::VSETALLNEZ_H; ++ break; ++ case LoongArch::PseudoVBNZ_W: ++ CondOpc = LoongArch::VSETALLNEZ_W; ++ break; ++ case LoongArch::PseudoVBNZ_D: ++ CondOpc = LoongArch::VSETALLNEZ_D; ++ break; ++ } ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ DebugLoc DL = MI.getDebugLoc(); ++ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); ++ MachineFunction::iterator It = ++BB->getIterator(); ++ ++ MachineFunction *F = BB->getParent(); ++ MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); ++ ++ F->insert(It, FalseBB); ++ F->insert(It, TrueBB); ++ F->insert(It, SinkBB); ++ ++ // Transfer the remainder of MBB and its successor edges to Sink. ++ SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); ++ SinkBB->transferSuccessorsAndUpdatePHIs(BB); ++ ++ // Insert the real instruction to BB. ++ Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); ++ BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); ++ ++ // Insert branch. ++ BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); ++ BB->addSuccessor(FalseBB); ++ BB->addSuccessor(TrueBB); ++ ++ // FalseBB. ++ Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); ++ BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) ++ .addReg(LoongArch::R0) ++ .addImm(0); ++ BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); ++ FalseBB->addSuccessor(SinkBB); ++ ++ // TrueBB. ++ Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); ++ BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) ++ .addReg(LoongArch::R0) ++ .addImm(1); ++ TrueBB->addSuccessor(SinkBB); ++ ++ // SinkBB: merge the results. ++ BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), ++ MI.getOperand(0).getReg()) ++ .addReg(RD1) ++ .addMBB(FalseBB) ++ .addReg(RD2) ++ .addMBB(TrueBB); ++ ++ // The pseudo instruction is gone now. ++ MI.eraseFromParent(); ++ return SinkBB; ++} ++ + MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); +@@ -1786,6 +2626,17 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + MI.eraseFromParent(); + return BB; + } ++ case LoongArch::PseudoVBZ: ++ case LoongArch::PseudoVBZ_B: ++ case LoongArch::PseudoVBZ_H: ++ case LoongArch::PseudoVBZ_W: ++ case LoongArch::PseudoVBZ_D: ++ case LoongArch::PseudoVBNZ: ++ case LoongArch::PseudoVBNZ_B: ++ case LoongArch::PseudoVBNZ_H: ++ case LoongArch::PseudoVBNZ_W: ++ case LoongArch::PseudoVBNZ_D: ++ return emitVecCondBranchPseudo(MI, BB, Subtarget); + } + } + +@@ -1858,6 +2709,13 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { + NODE_NAME_CASE(MOVFCSR2GR) + NODE_NAME_CASE(CACOP_D) + NODE_NAME_CASE(CACOP_W) ++ NODE_NAME_CASE(VPICK_SEXT_ELT) ++ NODE_NAME_CASE(VPICK_ZEXT_ELT) ++ NODE_NAME_CASE(VREPLVE) ++ NODE_NAME_CASE(VALL_ZERO) ++ NODE_NAME_CASE(VANY_ZERO) ++ NODE_NAME_CASE(VALL_NONZERO) ++ NODE_NAME_CASE(VANY_NONZERO) + } + #undef NODE_NAME_CASE + return nullptr; +@@ -1884,6 +2742,10 @@ const MCPhysReg ArgFPR64s[] = { + LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, + LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; + ++const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, ++ LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, ++ LoongArch::VR6, LoongArch::VR7}; ++ + // Pass a 2*GRLen argument that has been split into two GRLen values through + // registers or the stack as necessary. + static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, +@@ -2030,6 +2892,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, + Reg = State.AllocateReg(ArgFPR32s); + else if (ValVT == MVT::f64 && !UseGPRForFloat) + Reg = State.AllocateReg(ArgFPR64s); ++ else if (ValVT.is128BitVector()) ++ Reg = State.AllocateReg(ArgVRs); + else + Reg = State.AllocateReg(ArgGPRs); + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 500407493fe5..7765057ebffb 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -110,6 +110,20 @@ enum NodeType : unsigned { + + // Read CPU configuration information operation + CPUCFG, ++ ++ // Vector Shuffle ++ VREPLVE, ++ ++ // Extended vector element extraction ++ VPICK_SEXT_ELT, ++ VPICK_ZEXT_ELT, ++ ++ // Vector comparisons ++ VALL_ZERO, ++ VANY_ZERO, ++ VALL_NONZERO, ++ VANY_NONZERO, ++ + // Intrinsic operations end ============================================= + }; + } // end namespace LoongArchISD +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +index ef79b8a0dcd3..a5d66ebac96a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +@@ -47,6 +47,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + return; + } + ++ // VR->VR copies. ++ if (LoongArch::LSX128RegClass.contains(DstReg, SrcReg)) { ++ BuildMI(MBB, MBBI, DL, get(LoongArch::VORI_B), DstReg) ++ .addReg(SrcReg, getKillRegState(KillSrc)) ++ .addImm(0); ++ return; ++ } ++ + // GPR->CFR copy. + if (LoongArch::CFRRegClass.contains(DstReg) && + LoongArch::GPRRegClass.contains(SrcReg)) { +@@ -99,6 +107,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( + Opcode = LoongArch::FST_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FST_D; ++ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) ++ Opcode = LoongArch::VST; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoST_CFR; + else +@@ -133,6 +143,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + Opcode = LoongArch::FLD_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FLD_D; ++ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) ++ Opcode = LoongArch::VLD; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoLD_CFR; + else +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index ac391ef471b1..b2c4bb812ba5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -182,7 +182,7 @@ def imm32 : Operand { + let ParserMatchClass = ImmAsmOperand<"", 32, "">; + } + +-def uimm1 : Operand { ++def uimm1 : Operand, ImmLeaf(Imm);}]>{ + let ParserMatchClass = UImmAsmOperand<1>; + } + +@@ -197,11 +197,11 @@ def uimm2_plus1 : Operand, + let DecoderMethod = "decodeUImmOperand<2, 1>"; + } + +-def uimm3 : Operand { ++def uimm3 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<3>; + } + +-def uimm4 : Operand { ++def uimm4 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<4>; + } + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index a8ed285a37cf..13332be0bc38 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -10,6 +10,146 @@ + // + //===----------------------------------------------------------------------===// + ++def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisInt<1>, SDTCisVec<1>, ++ SDTCisSameAs<0, 1>, SDTCisInt<2>]>; ++def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; ++ ++// Target nodes. ++def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; ++def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO", ++ SDT_LoongArchVecCond>; ++def loongarch_vany_nonzero : SDNode<"LoongArchISD::VANY_NONZERO", ++ SDT_LoongArchVecCond>; ++def loongarch_vall_zero : SDNode<"LoongArchISD::VALL_ZERO", ++ SDT_LoongArchVecCond>; ++def loongarch_vany_zero : SDNode<"LoongArchISD::VANY_ZERO", ++ SDT_LoongArchVecCond>; ++ ++def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT", ++ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; ++def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT", ++ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; ++ ++class VecCond ++ : Pseudo<(outs GPR:$rd), (ins RC:$vj), ++ [(set GPR:$rd, (OpNode (TyNode RC:$vj)))]> { ++ let hasSideEffects = 0; ++ let mayLoad = 0; ++ let mayStore = 0; ++ let usesCustomInserter = 1; ++} ++ ++def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector), ++ (bitconvert (v4i32 (build_vector)))], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def vsplati8_imm_eq_7 : PatFrags<(ops), [(build_vector)], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 7; ++}]>; ++def vsplati16_imm_eq_15 : PatFrags<(ops), [(build_vector)], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 15; ++}]>; ++def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31; ++}]>; ++def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), ++ (bitconvert (v4i32 (build_vector)))], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; ++}]>; ++ ++def vsplati8imm7 : PatFrag<(ops node:$reg), ++ (and node:$reg, vsplati8_imm_eq_7)>; ++def vsplati16imm15 : PatFrag<(ops node:$reg), ++ (and node:$reg, vsplati16_imm_eq_15)>; ++def vsplati32imm31 : PatFrag<(ops node:$reg), ++ (and node:$reg, vsplati32_imm_eq_31)>; ++def vsplati64imm63 : PatFrag<(ops node:$reg), ++ (and node:$reg, vsplati64_imm_eq_63)>; ++ ++foreach N = [3, 4, 5, 6, 8] in ++ def SplatPat_uimm#N : ComplexPattern", ++ [build_vector, bitconvert], [], 2>; ++ ++foreach N = [5] in ++ def SplatPat_simm#N : ComplexPattern", ++ [build_vector, bitconvert]>; ++ ++def vsplat_uimm_inv_pow2 : ComplexPattern; ++ ++def vsplat_uimm_pow2 : ComplexPattern; ++ ++def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), ++ (add node:$vd, (mul node:$vj, node:$vk))>; ++ ++def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), ++ (sub node:$vd, (mul node:$vj, node:$vk))>; ++ ++def lsxsplati8 : PatFrag<(ops node:$e0), ++ (v16i8 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def lsxsplati16 : PatFrag<(ops node:$e0), ++ (v8i16 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def lsxsplati32 : PatFrag<(ops node:$e0), ++ (v4i32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++ ++def lsxsplati64 : PatFrag<(ops node:$e0), ++ (v2i64 (build_vector node:$e0, node:$e0))>; ++ ++def to_valide_timm : SDNodeXForm(N); ++ return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); ++}]>; ++ + //===----------------------------------------------------------------------===// + // Instruction class templates + //===----------------------------------------------------------------------===// +@@ -1004,4 +1144,680 @@ def PseudoVREPLI_D : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [], + "vrepli.d", "$vd, $imm">; + } + ++def PseudoVBNZ_B : VecCond; ++def PseudoVBNZ_H : VecCond; ++def PseudoVBNZ_W : VecCond; ++def PseudoVBNZ_D : VecCond; ++def PseudoVBNZ : VecCond; ++ ++def PseudoVBZ_B : VecCond; ++def PseudoVBZ_H : VecCond; ++def PseudoVBZ_W : VecCond; ++def PseudoVBZ_D : VecCond; ++def PseudoVBZ : VecCond; ++ ++} // Predicates = [HasExtLSX] ++ ++multiclass PatVr { ++ def : Pat<(v16i8 (OpNode (v16i8 LSX128:$vj))), ++ (!cast(Inst#"_B") LSX128:$vj)>; ++ def : Pat<(v8i16 (OpNode (v8i16 LSX128:$vj))), ++ (!cast(Inst#"_H") LSX128:$vj)>; ++ def : Pat<(v4i32 (OpNode (v4i32 LSX128:$vj))), ++ (!cast(Inst#"_W") LSX128:$vj)>; ++ def : Pat<(v2i64 (OpNode (v2i64 LSX128:$vj))), ++ (!cast(Inst#"_D") LSX128:$vj)>; ++} ++ ++multiclass PatVrVr { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatVrVrF { ++ def : Pat<(OpNode (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), ++ (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatVrVrU { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatVrSimm5 { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; ++} ++ ++multiclass PatVrUimm5 { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; ++} ++ ++multiclass PatVrVrVr { ++ def : Pat<(OpNode (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst#"_B") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst#"_H") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst#"_W") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst#"_D") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatShiftVrVr { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (and vsplati8_imm_eq_7, ++ (v16i8 LSX128:$vk))), ++ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (and vsplati16_imm_eq_15, ++ (v8i16 LSX128:$vk))), ++ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (and vsplati32_imm_eq_31, ++ (v4i32 LSX128:$vk))), ++ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (and vsplati64_imm_eq_63, ++ (v2i64 LSX128:$vk))), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatShiftVrUimm { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm3 uimm3:$imm))), ++ (!cast(Inst#"_B") LSX128:$vj, uimm3:$imm)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm4 uimm4:$imm))), ++ (!cast(Inst#"_H") LSX128:$vj, uimm4:$imm)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_W") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm6 uimm6:$imm))), ++ (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; ++} ++ ++class PatVrVrB ++ : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (Inst LSX128:$vj, LSX128:$vk)>; ++ ++let Predicates = [HasExtLSX] in { ++ ++// VADD_{B/H/W/D} ++defm : PatVrVr; ++// VSUB_{B/H/W/D} ++defm : PatVrVr; ++ ++// VADDI_{B/H/W/D}U ++defm : PatVrUimm5; ++// VSUBI_{B/H/W/D}U ++defm : PatVrUimm5; ++ ++// VNEG_{B/H/W/D} ++def : Pat<(sub immAllZerosV, (v16i8 LSX128:$vj)), (VNEG_B LSX128:$vj)>; ++def : Pat<(sub immAllZerosV, (v8i16 LSX128:$vj)), (VNEG_H LSX128:$vj)>; ++def : Pat<(sub immAllZerosV, (v4i32 LSX128:$vj)), (VNEG_W LSX128:$vj)>; ++def : Pat<(sub immAllZerosV, (v2i64 LSX128:$vj)), (VNEG_D LSX128:$vj)>; ++ ++// VMAX[I]_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++defm : PatVrSimm5; ++defm : PatVrUimm5; ++ ++// VMIN[I]_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++defm : PatVrSimm5; ++defm : PatVrUimm5; ++ ++// VMUL_{B/H/W/D} ++defm : PatVrVr; ++ ++// VMADD_{B/H/W/D} ++defm : PatVrVrVr; ++// VMSUB_{B/H/W/D} ++defm : PatVrVrVr; ++ ++// VDIV_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++ ++// VMOD_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++ ++// VAND_V ++def : PatVrVrB; ++// VNOR_V ++def : PatVrVrB; ++// VXOR_V ++def : PatVrVrB; ++// VNOR_V ++def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), ++ (VNOR_V LSX128:$vj, LSX128:$vk)>; ++ ++// VANDI_B ++def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), ++ (VANDI_B LSX128:$vj, uimm8:$imm)>; ++// VORI_B ++def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), ++ (VORI_B LSX128:$vj, uimm8:$imm)>; ++ ++// VXORI_B ++def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), ++ (VXORI_B LSX128:$vj, uimm8:$imm)>; ++ ++// VSLL[I]_{B/H/W/D} ++defm : PatVrVr; ++defm : PatShiftVrVr; ++defm : PatShiftVrUimm; ++ ++// VSRL[I]_{B/H/W/D} ++defm : PatVrVr; ++defm : PatShiftVrVr; ++defm : PatShiftVrUimm; ++ ++// VSRA[I]_{B/H/W/D} ++defm : PatVrVr; ++defm : PatShiftVrVr; ++defm : PatShiftVrUimm; ++ ++// VPCNT_{B/H/W/D} ++defm : PatVr; ++ ++// VBITCLR_{B/H/W/D} ++def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))), ++ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))), ++ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))), ++ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))), ++ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; ++def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati8imm7 v16i8:$vk)))), ++ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati16imm15 v8i16:$vk)))), ++ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati32imm31 v4i32:$vk)))), ++ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati64imm63 v2i64:$vk)))), ++ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; ++ ++// VBITCLRI_{B/H/W/D} ++def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), ++ (VBITCLRI_B LSX128:$vj, uimm3:$imm)>; ++def : Pat<(and (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), ++ (VBITCLRI_H LSX128:$vj, uimm4:$imm)>; ++def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), ++ (VBITCLRI_W LSX128:$vj, uimm5:$imm)>; ++def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), ++ (VBITCLRI_D LSX128:$vj, uimm6:$imm)>; ++ ++// VBITSET_{B/H/W/D} ++def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), ++ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), ++ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), ++ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), ++ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; ++def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), ++ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), ++ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), ++ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), ++ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; ++ ++// VBITSETI_{B/H/W/D} ++def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), ++ (VBITSETI_B LSX128:$vj, uimm3:$imm)>; ++def : Pat<(or (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), ++ (VBITSETI_H LSX128:$vj, uimm4:$imm)>; ++def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), ++ (VBITSETI_W LSX128:$vj, uimm5:$imm)>; ++def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), ++ (VBITSETI_D LSX128:$vj, uimm6:$imm)>; ++ ++// VBITREV_{B/H/W/D} ++def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), ++ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), ++ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), ++ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), ++ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; ++def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), ++ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), ++ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), ++ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), ++ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; ++ ++// VBITREVI_{B/H/W/D} ++def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), ++ (VBITREVI_B LSX128:$vj, uimm3:$imm)>; ++def : Pat<(xor (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), ++ (VBITREVI_H LSX128:$vj, uimm4:$imm)>; ++def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), ++ (VBITREVI_W LSX128:$vj, uimm5:$imm)>; ++def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), ++ (VBITREVI_D LSX128:$vj, uimm6:$imm)>; ++ ++// VFADD_{S/D} ++defm : PatVrVrF; ++ ++// VFSUB_{S/D} ++defm : PatVrVrF; ++ ++// VFMUL_{S/D} ++defm : PatVrVrF; ++ ++// VFDIV_{S/D} ++defm : PatVrVrF; ++ ++// VFMADD_{S/D} ++def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), ++ (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), ++ (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++// VINSGR2VR_{B/H/W/D} ++def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), ++ (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; ++def : Pat<(vector_insert v8i16:$vd, GRLenVT:$rj, uimm3:$imm), ++ (VINSGR2VR_H v8i16:$vd, GRLenVT:$rj, uimm3:$imm)>; ++def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), ++ (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>; ++def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), ++ (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; ++ ++// VPICKVE2GR_{B/H/W}[U] ++def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), ++ (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; ++def : Pat<(loongarch_vpick_sext_elt v8i16:$vd, uimm3:$imm, i16), ++ (VPICKVE2GR_H v8i16:$vd, uimm3:$imm)>; ++def : Pat<(loongarch_vpick_sext_elt v4i32:$vd, uimm2:$imm, i32), ++ (VPICKVE2GR_W v4i32:$vd, uimm2:$imm)>; ++ ++def : Pat<(loongarch_vpick_zext_elt v16i8:$vd, uimm4:$imm, i8), ++ (VPICKVE2GR_BU v16i8:$vd, uimm4:$imm)>; ++def : Pat<(loongarch_vpick_zext_elt v8i16:$vd, uimm3:$imm, i16), ++ (VPICKVE2GR_HU v8i16:$vd, uimm3:$imm)>; ++def : Pat<(loongarch_vpick_zext_elt v4i32:$vd, uimm2:$imm, i32), ++ (VPICKVE2GR_WU v4i32:$vd, uimm2:$imm)>; ++ ++// VREPLGR2VR_{B/H/W/D} ++def : Pat<(lsxsplati8 GPR:$rj), (VREPLGR2VR_B GPR:$rj)>; ++def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>; ++def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>; ++def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>; ++ ++// VREPLVE_{B/H/W/D} ++def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk), ++ (VREPLVE_B v16i8:$vj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v8i16:$vj, GRLenVT:$rk), ++ (VREPLVE_H v8i16:$vj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), ++ (VREPLVE_W v4i32:$vj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), ++ (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; ++ ++// Loads/Stores ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in { ++ defm : LdPat; ++ def : RegRegLdPat; ++ defm : StPat; ++ def : RegRegStPat; ++} ++ ++} // Predicates = [HasExtLSX] ++ ++/// Intrinsic pattern ++ ++class deriveLSXIntrinsic { ++ Intrinsic ret = !cast(!tolower("int_loongarch_lsx_"#Inst)); ++} ++ ++let Predicates = [HasExtLSX] in { ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vj, vty:$vk), ++// (LAInst vty:$vj, vty:$vk)>; ++foreach Inst = ["VSADD_B", "VSADD_BU", "VSSUB_B", "VSSUB_BU", ++ "VHADDW_H_B", "VHADDW_HU_BU", "VHSUBW_H_B", "VHSUBW_HU_BU", ++ "VADDWEV_H_B", "VADDWOD_H_B", "VSUBWEV_H_B", "VSUBWOD_H_B", ++ "VADDWEV_H_BU", "VADDWOD_H_BU", "VSUBWEV_H_BU", "VSUBWOD_H_BU", ++ "VADDWEV_H_BU_B", "VADDWOD_H_BU_B", ++ "VAVG_B", "VAVG_BU", "VAVGR_B", "VAVGR_BU", ++ "VABSD_B", "VABSD_BU", "VADDA_B", "VMUH_B", "VMUH_BU", ++ "VMULWEV_H_B", "VMULWOD_H_B", "VMULWEV_H_BU", "VMULWOD_H_BU", ++ "VMULWEV_H_BU_B", "VMULWOD_H_BU_B", "VSIGNCOV_B", ++ "VANDN_V", "VORN_V", "VROTR_B", "VSRLR_B", "VSRAR_B", ++ "VSEQ_B", "VSLE_B", "VSLE_BU", "VSLT_B", "VSLT_BU", ++ "VPACKEV_B", "VPACKOD_B", "VPICKEV_B", "VPICKOD_B", ++ "VILVL_B", "VILVH_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VSADD_H", "VSADD_HU", "VSSUB_H", "VSSUB_HU", ++ "VHADDW_W_H", "VHADDW_WU_HU", "VHSUBW_W_H", "VHSUBW_WU_HU", ++ "VADDWEV_W_H", "VADDWOD_W_H", "VSUBWEV_W_H", "VSUBWOD_W_H", ++ "VADDWEV_W_HU", "VADDWOD_W_HU", "VSUBWEV_W_HU", "VSUBWOD_W_HU", ++ "VADDWEV_W_HU_H", "VADDWOD_W_HU_H", ++ "VAVG_H", "VAVG_HU", "VAVGR_H", "VAVGR_HU", ++ "VABSD_H", "VABSD_HU", "VADDA_H", "VMUH_H", "VMUH_HU", ++ "VMULWEV_W_H", "VMULWOD_W_H", "VMULWEV_W_HU", "VMULWOD_W_HU", ++ "VMULWEV_W_HU_H", "VMULWOD_W_HU_H", "VSIGNCOV_H", "VROTR_H", ++ "VSRLR_H", "VSRAR_H", "VSRLN_B_H", "VSRAN_B_H", "VSRLRN_B_H", ++ "VSRARN_B_H", "VSSRLN_B_H", "VSSRAN_B_H", "VSSRLN_BU_H", ++ "VSSRAN_BU_H", "VSSRLRN_B_H", "VSSRARN_B_H", "VSSRLRN_BU_H", ++ "VSSRARN_BU_H", ++ "VSEQ_H", "VSLE_H", "VSLE_HU", "VSLT_H", "VSLT_HU", ++ "VPACKEV_H", "VPACKOD_H", "VPICKEV_H", "VPICKOD_H", ++ "VILVL_H", "VILVH_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VSADD_W", "VSADD_WU", "VSSUB_W", "VSSUB_WU", ++ "VHADDW_D_W", "VHADDW_DU_WU", "VHSUBW_D_W", "VHSUBW_DU_WU", ++ "VADDWEV_D_W", "VADDWOD_D_W", "VSUBWEV_D_W", "VSUBWOD_D_W", ++ "VADDWEV_D_WU", "VADDWOD_D_WU", "VSUBWEV_D_WU", "VSUBWOD_D_WU", ++ "VADDWEV_D_WU_W", "VADDWOD_D_WU_W", ++ "VAVG_W", "VAVG_WU", "VAVGR_W", "VAVGR_WU", ++ "VABSD_W", "VABSD_WU", "VADDA_W", "VMUH_W", "VMUH_WU", ++ "VMULWEV_D_W", "VMULWOD_D_W", "VMULWEV_D_WU", "VMULWOD_D_WU", ++ "VMULWEV_D_WU_W", "VMULWOD_D_WU_W", "VSIGNCOV_W", "VROTR_W", ++ "VSRLR_W", "VSRAR_W", "VSRLN_H_W", "VSRAN_H_W", "VSRLRN_H_W", ++ "VSRARN_H_W", "VSSRLN_H_W", "VSSRAN_H_W", "VSSRLN_HU_W", ++ "VSSRAN_HU_W", "VSSRLRN_H_W", "VSSRARN_H_W", "VSSRLRN_HU_W", ++ "VSSRARN_HU_W", ++ "VSEQ_W", "VSLE_W", "VSLE_WU", "VSLT_W", "VSLT_WU", ++ "VPACKEV_W", "VPACKOD_W", "VPICKEV_W", "VPICKOD_W", ++ "VILVL_W", "VILVH_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VADD_Q", "VSUB_Q", ++ "VSADD_D", "VSADD_DU", "VSSUB_D", "VSSUB_DU", ++ "VHADDW_Q_D", "VHADDW_QU_DU", "VHSUBW_Q_D", "VHSUBW_QU_DU", ++ "VADDWEV_Q_D", "VADDWOD_Q_D", "VSUBWEV_Q_D", "VSUBWOD_Q_D", ++ "VADDWEV_Q_DU", "VADDWOD_Q_DU", "VSUBWEV_Q_DU", "VSUBWOD_Q_DU", ++ "VADDWEV_Q_DU_D", "VADDWOD_Q_DU_D", ++ "VAVG_D", "VAVG_DU", "VAVGR_D", "VAVGR_DU", ++ "VABSD_D", "VABSD_DU", "VADDA_D", "VMUH_D", "VMUH_DU", ++ "VMULWEV_Q_D", "VMULWOD_Q_D", "VMULWEV_Q_DU", "VMULWOD_Q_DU", ++ "VMULWEV_Q_DU_D", "VMULWOD_Q_DU_D", "VSIGNCOV_D", "VROTR_D", ++ "VSRLR_D", "VSRAR_D", "VSRLN_W_D", "VSRAN_W_D", "VSRLRN_W_D", ++ "VSRARN_W_D", "VSSRLN_W_D", "VSSRAN_W_D", "VSSRLN_WU_D", ++ "VSSRAN_WU_D", "VSSRLRN_W_D", "VSSRARN_W_D", "VSSRLRN_WU_D", ++ "VSSRARN_WU_D", "VFFINT_S_L", ++ "VSEQ_D", "VSLE_D", "VSLE_DU", "VSLT_D", "VSLT_DU", ++ "VPACKEV_D", "VPACKOD_D", "VPICKEV_D", "VPICKOD_D", ++ "VILVL_D", "VILVH_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), ++// (LAInst vty:$vd, vty:$vj, vty:$vk)>; ++foreach Inst = ["VMADDWEV_H_B", "VMADDWOD_H_B", "VMADDWEV_H_BU", ++ "VMADDWOD_H_BU", "VMADDWEV_H_BU_B", "VMADDWOD_H_BU_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v8i16 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VMADDWEV_W_H", "VMADDWOD_W_H", "VMADDWEV_W_HU", ++ "VMADDWOD_W_HU", "VMADDWEV_W_HU_H", "VMADDWOD_W_HU_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4i32 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VMADDWEV_D_W", "VMADDWOD_D_W", "VMADDWEV_D_WU", ++ "VMADDWOD_D_WU", "VMADDWEV_D_WU_W", "VMADDWOD_D_WU_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2i64 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", ++ "VMADDWOD_Q_DU", "VMADDWEV_Q_DU_D", "VMADDWOD_Q_DU_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vj), ++// (LAInst vty:$vj)>; ++foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", ++ "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", ++ "VCLO_B", "VCLZ_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", ++ "VCLO_H", "VCLZ_H", "VFCVTL_S_H", "VFCVTH_S_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", ++ "VCLO_W", "VCLZ_W", "VFFINT_S_W", "VFFINT_S_WU", ++ "VFFINTL_D_W", "VFFINTH_D_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", ++ "VEXTL_Q_D", "VEXTL_QU_DU", ++ "VCLO_D", "VCLZ_D", "VFFINT_D_L", "VFFINT_D_LU"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++ ++// Pat<(Intrinsic timm:$imm) ++// (LAInst timm:$imm)>; ++def : Pat<(int_loongarch_lsx_vldi timm:$imm), ++ (VLDI (to_valide_timm timm:$imm))>; ++foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret timm:$imm), ++ (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vj, timm:$imm) ++// (LAInst vty:$vj, timm:$imm)>; ++foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", ++ "VSLLWIL_HU_BU", "VSRLRI_B", "VSRARI_B", ++ "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", ++ "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", ++ "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", ++ "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", ++ "VREPLVEI_H", "VSHUF4I_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", ++ "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", ++ "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", ++ "VREPLVEI_W", "VSHUF4I_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", ++ "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", ++ "VPICKVE2GR_D", "VPICKVE2GR_DU", ++ "VREPLVEI_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) ++// (LAInst vty:$vd, vty:$vj, timm:$imm)>; ++foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", ++ "VSSRLNI_B_H", "VSSRANI_B_H", "VSSRLNI_BU_H", "VSSRANI_BU_H", ++ "VSSRLRNI_B_H", "VSSRARNI_B_H", "VSSRLRNI_BU_H", "VSSRARNI_BU_H", ++ "VFRSTPI_B", "VBITSELI_B", "VEXTRINS_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", ++ "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", ++ "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", ++ "VFRSTPI_H", "VEXTRINS_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", ++ "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", ++ "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", ++ "VPERMI_W", "VEXTRINS_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", ++ "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", ++ "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", ++ "VSHUF4I_D", "VEXTRINS_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, ++ (to_valide_timm timm:$imm))>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), ++// (LAInst vty:$vd, vty:$vj, vty:$vk)>; ++foreach Inst = ["VFRSTP_B", "VBITSEL_V", "VSHUF_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VFRSTP_H", "VSHUF_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++def : Pat<(int_loongarch_lsx_vshuf_w (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), ++ (v4i32 LSX128:$vk)), ++ (VSHUF_W LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++def : Pat<(int_loongarch_lsx_vshuf_d (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), ++ (v2i64 LSX128:$vk)), ++ (VSHUF_D LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ ++// vty: v4f32/v2f64 ++// Pat<(Intrinsic vty:$vj, vty:$vk, vty:$va), ++// (LAInst vty:$vj, vty:$vk, vty:$va)>; ++foreach Inst = ["VFMSUB_S", "VFNMADD_S", "VFNMSUB_S"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), (v4f32 LSX128:$va)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; ++foreach Inst = ["VFMSUB_D", "VFNMADD_D", "VFNMSUB_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), (v2f64 LSX128:$va)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; ++ ++// vty: v4f32/v2f64 ++// Pat<(Intrinsic vty:$vj, vty:$vk), ++// (LAInst vty:$vj, vty:$vk)>; ++foreach Inst = ["VFMAX_S", "VFMIN_S", "VFMAXA_S", "VFMINA_S", "VFCVT_H_S", ++ "VFCMP_CAF_S", "VFCMP_CUN_S", "VFCMP_CEQ_S", "VFCMP_CUEQ_S", ++ "VFCMP_CLT_S", "VFCMP_CULT_S", "VFCMP_CLE_S", "VFCMP_CULE_S", ++ "VFCMP_CNE_S", "VFCMP_COR_S", "VFCMP_CUNE_S", ++ "VFCMP_SAF_S", "VFCMP_SUN_S", "VFCMP_SEQ_S", "VFCMP_SUEQ_S", ++ "VFCMP_SLT_S", "VFCMP_SULT_S", "VFCMP_SLE_S", "VFCMP_SULE_S", ++ "VFCMP_SNE_S", "VFCMP_SOR_S", "VFCMP_SUNE_S"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VFMAX_D", "VFMIN_D", "VFMAXA_D", "VFMINA_D", "VFCVT_S_D", ++ "VFTINTRNE_W_D", "VFTINTRZ_W_D", "VFTINTRP_W_D", "VFTINTRM_W_D", ++ "VFTINT_W_D", ++ "VFCMP_CAF_D", "VFCMP_CUN_D", "VFCMP_CEQ_D", "VFCMP_CUEQ_D", ++ "VFCMP_CLT_D", "VFCMP_CULT_D", "VFCMP_CLE_D", "VFCMP_CULE_D", ++ "VFCMP_CNE_D", "VFCMP_COR_D", "VFCMP_CUNE_D", ++ "VFCMP_SAF_D", "VFCMP_SUN_D", "VFCMP_SEQ_D", "VFCMP_SUEQ_D", ++ "VFCMP_SLT_D", "VFCMP_SULT_D", "VFCMP_SLE_D", "VFCMP_SULE_D", ++ "VFCMP_SNE_D", "VFCMP_SOR_D", "VFCMP_SUNE_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++ ++// vty: v4f32/v2f64 ++// Pat<(Intrinsic vty:$vj), ++// (LAInst vty:$vj)>; ++foreach Inst = ["VFLOGB_S", "VFCLASS_S", "VFSQRT_S", "VFRECIP_S", "VFRSQRT_S", ++ "VFRINT_S", "VFCVTL_D_S", "VFCVTH_D_S", ++ "VFRINTRNE_S", "VFRINTRZ_S", "VFRINTRP_S", "VFRINTRM_S", ++ "VFTINTRNE_W_S", "VFTINTRZ_W_S", "VFTINTRP_W_S", "VFTINTRM_W_S", ++ "VFTINT_W_S", "VFTINTRZ_WU_S", "VFTINT_WU_S", ++ "VFTINTRNEL_L_S", "VFTINTRNEH_L_S", "VFTINTRZL_L_S", ++ "VFTINTRZH_L_S", "VFTINTRPL_L_S", "VFTINTRPH_L_S", ++ "VFTINTRML_L_S", "VFTINTRMH_L_S", "VFTINTL_L_S", ++ "VFTINTH_L_S"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v4f32 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", ++ "VFRINT_D", ++ "VFRINTRNE_D", "VFRINTRZ_D", "VFRINTRP_D", "VFRINTRM_D", ++ "VFTINTRNE_L_D", "VFTINTRZ_L_D", "VFTINTRP_L_D", "VFTINTRM_L_D", ++ "VFTINT_L_D", "VFTINTRZ_LU_D", "VFTINT_LU_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++ ++// load ++def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), ++ (VLD GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), ++ (VLDX GPR:$rj, GPR:$rk)>; ++ ++def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), ++ (VLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), ++ (VLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), ++ (VLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), ++ (VLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; ++ ++// store ++def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), ++ (VST LSX128:$vd, GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), ++ (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; ++ ++def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), ++ (VSTELM_B v16i8:$vd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), ++ (VSTELM_H v8i16:$vd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), ++ (VSTELM_W v4i32:$vd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), ++ (VSTELM_D v2i64:$vd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++ + } // Predicates = [HasExtLSX] +-- +2.20.1 + diff --git a/0001-Backport-LoongArch-Add-relax-feature-and-keep-relocations.patch b/0002-LoongArch-Add-relax-feature-and-keep-relocations-721.patch similarity index 99% rename from 0001-Backport-LoongArch-Add-relax-feature-and-keep-relocations.patch rename to 0002-LoongArch-Add-relax-feature-and-keep-relocations-721.patch index 9602f6c..9b94ce1 100644 --- a/0001-Backport-LoongArch-Add-relax-feature-and-keep-relocations.patch +++ b/0002-LoongArch-Add-relax-feature-and-keep-relocations-721.patch @@ -9,7 +9,6 @@ relocation with a symbol rather than section plus offset, and keep all relocations with non-abs symbol. (cherry picked from commit f5bfc833fcbf17a5876911783d1adaca7028d20c) -Change-Id: Ief38b480016175f2cc9939b74a84d9444559ffd6 --- llvm/lib/Target/LoongArch/LoongArch.td | 4 +++ .../lib/Target/LoongArch/LoongArchSubtarget.h | 2 ++ diff --git a/0013-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch b/0002-LoongArch-Add-some-atomic-tests-68766.patch similarity index 62% rename from 0013-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch rename to 0002-LoongArch-Add-some-atomic-tests-68766.patch index 98f2654..c1c761d 100644 --- a/0013-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch +++ b/0002-LoongArch-Add-some-atomic-tests-68766.patch @@ -1,1458 +1,10 @@ -From ad367d826e5959792ce7384be62ba1ccffbf0d9a Mon Sep 17 00:00:00 2001 -From: hev -Date: Wed, 11 Oct 2023 10:24:18 +0800 -Subject: [PATCH 1/7] [LoongArch] Improve codegen for atomic ops (#67391) - -This PR improves memory barriers generated by atomic operations. - -Memory barrier semantics of LL/SC: -``` -LL: + -SC: + -``` - -Changes: -* Remove unnecessary memory barriers before LL and between LL/SC. -* Fix acquire semantics. (If the SC instruction is not executed, then -the guarantee of acquiring semantics cannot be ensured. Therefore, an -acquire barrier needs to be generated when memory ordering includes an -acquire operation.) - -(cherry picked from commit 203ba238e33c570dba6cbcf247f1668bb2a13c26) ---- - .../LoongArchExpandAtomicPseudoInsts.cpp | 50 +-- - .../Target/LoongArch/LoongArchInstrInfo.td | 24 +- - .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 24 +- - .../ir-instruction/atomic-cmpxchg.ll | 376 ++++++++++++++++-- - .../LoongArch/ir-instruction/atomicrmw-fp.ll | 24 +- - .../ir-instruction/atomicrmw-minmax.ll | 24 -- - .../LoongArch/ir-instruction/atomicrmw.ll | 31 -- - llvm/unittests/Target/LoongArch/InstSizes.cpp | 2 +- - 8 files changed, 407 insertions(+), 148 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -index 51df0463e235..eb78ef065b21 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -@@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, - Register ScratchReg = MI.getOperand(1).getReg(); - Register AddrReg = MI.getOperand(2).getReg(); - Register IncrReg = MI.getOperand(3).getReg(); -- AtomicOrdering Ordering = -- static_cast(MI.getOperand(4).getImm()); - - // .loop: -- // if(Ordering != AtomicOrdering::Monotonic) -- // dbar 0 - // ll.[w|d] dest, (addr) - // binop scratch, dest, val - // sc.[w|d] scratch, scratch, (addr) - // beqz scratch, loop -- if (Ordering != AtomicOrdering::Monotonic) -- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopMBB, DL, - TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) - .addReg(AddrReg) -@@ -251,12 +245,8 @@ static void doMaskedAtomicBinOpExpansion( - Register AddrReg = MI.getOperand(2).getReg(); - Register IncrReg = MI.getOperand(3).getReg(); - Register MaskReg = MI.getOperand(4).getReg(); -- AtomicOrdering Ordering = -- static_cast(MI.getOperand(5).getImm()); - - // .loop: -- // if(Ordering != AtomicOrdering::Monotonic) -- // dbar 0 - // ll.w destreg, (alignedaddr) - // binop scratch, destreg, incr - // xor scratch, destreg, scratch -@@ -264,8 +254,6 @@ static void doMaskedAtomicBinOpExpansion( - // xor scratch, destreg, scratch - // sc.w scratch, scratch, (alignedaddr) - // beqz scratch, loop -- if (Ordering != AtomicOrdering::Monotonic) -- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) - .addReg(AddrReg) - .addImm(0); -@@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); -- auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - - // Insert new MBBs. - MF->insert(++MBB.getIterator(), LoopHeadMBB); - MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); - MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); -- MF->insert(++LoopTailMBB->getIterator(), TailMBB); -- MF->insert(++TailMBB->getIterator(), DoneMBB); -+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB); - - // Set up successors and transfer remaining instructions to DoneMBB. - LoopHeadMBB->addSuccessor(LoopIfBodyMBB); - LoopHeadMBB->addSuccessor(LoopTailMBB); - LoopIfBodyMBB->addSuccessor(LoopTailMBB); - LoopTailMBB->addSuccessor(LoopHeadMBB); -- LoopTailMBB->addSuccessor(TailMBB); -- TailMBB->addSuccessor(DoneMBB); -+ LoopTailMBB->addSuccessor(DoneMBB); - DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); - DoneMBB->transferSuccessors(&MBB); - MBB.addSuccessor(LoopHeadMBB); -@@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - - // - // .loophead: -- // dbar 0 - // ll.w destreg, (alignedaddr) - // and scratch2, destreg, mask - // move scratch1, destreg -- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg) - .addReg(AddrReg) - .addImm(0); -@@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - // .looptail: - // sc.w scratch1, scratch1, (addr) - // beqz scratch1, loop -- // dbar 0x700 - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg) - .addReg(Scratch1Reg) - .addReg(AddrReg) -@@ -472,10 +454,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - .addReg(Scratch1Reg) - .addMBB(LoopHeadMBB); - -- // .tail: -- // dbar 0x700 -- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); -- - NextMBBI = MBB.end(); - MI.eraseFromParent(); - -@@ -483,7 +461,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); - computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); - computeAndAddLiveIns(LiveRegs, *LoopTailMBB); -- computeAndAddLiveIns(LiveRegs, *TailMBB); - computeAndAddLiveIns(LiveRegs, *DoneMBB); - - return true; -@@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - .addReg(CmpValReg) - .addMBB(TailMBB); - // .looptail: -- // dbar 0 - // move scratch, newval - // sc.[w|d] scratch, scratch, (addr) - // beqz scratch, loophead - // b done -- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) - .addReg(NewValReg) - .addReg(LoongArch::R0); -@@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - .addMBB(TailMBB); - - // .looptail: -- // dbar 0 - // andn scratch, dest, mask - // or scratch, scratch, newval - // sc.[w|d] scratch, scratch, (addr) - // beqz scratch, loophead - // b done -- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg) - .addReg(DestReg) - .addReg(MaskReg); -@@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); - } - -+ AtomicOrdering Ordering = -+ static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); -+ int hint; -+ -+ switch (Ordering) { -+ case AtomicOrdering::Acquire: -+ case AtomicOrdering::AcquireRelease: -+ case AtomicOrdering::SequentiallyConsistent: -+ // TODO: acquire -+ hint = 0; -+ break; -+ default: -+ hint = 0x700; -+ } -+ - // .tail: -- // dbar 0x700 -- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); -+ // dbar 0x700 | acquire -+ BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); - - NextMBBI = MBB.end(); - MI.eraseFromParent(); -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index 05ae36a9781d..a9b0db30c2f6 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -1731,7 +1731,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; - - class PseudoCmpXchg - : Pseudo<(outs GPR:$res, GPR:$scratch), -- (ins GPR:$addr, GPR:$cmpval, GPR:$newval)> { -+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { - let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; - let mayLoad = 1; - let mayStore = 1; -@@ -1821,14 +1821,28 @@ def : AtomicPat; - --def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new), -- (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>; -+// Ordering constants must be kept in sync with the AtomicOrdering enum in -+// AtomicOrdering.h. -+multiclass PseudoCmpXchgPat { -+ def : Pat<(vt (!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; -+ def : Pat<(vt (!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; -+ def : Pat<(vt (!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; -+ def : Pat<(vt (!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; -+ def : Pat<(vt (!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; -+} -+ -+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; -+defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; - def : Pat<(int_loongarch_masked_cmpxchg_i64 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), - (PseudoMaskedCmpXchg32 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; --def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), -- (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; - - def : PseudoMaskedAMMinMaxPat; -diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -index f11af8fe6528..32106886c783 100644 ---- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -@@ -34,14 +34,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: bne $a5, $a3, .LBB0_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a7, $a6 - ; LA64-NEXT: sc.w $a7, $a2, 0 - ; LA64-NEXT: beqz $a7, .LBB0_3 - ; LA64-NEXT: b .LBB0_6 - ; LA64-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64-NEXT: addi.w $a6, $a3, 0 -@@ -88,14 +87,13 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: bne $a5, $a3, .LBB1_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a7, $a6 - ; LA64-NEXT: sc.w $a7, $a2, 0 - ; LA64-NEXT: beqz $a7, .LBB1_3 - ; LA64-NEXT: b .LBB1_6 - ; LA64-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64-NEXT: addi.w $a6, $a3, 0 -@@ -129,14 +127,13 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: bne $a1, $a3, .LBB2_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a6, $a5 - ; LA64-NEXT: sc.w $a6, $a0, 0 - ; LA64-NEXT: beqz $a6, .LBB2_3 - ; LA64-NEXT: b .LBB2_6 - ; LA64-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64-NEXT: move $a3, $a1 -@@ -168,14 +165,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { - ; LA64-NEXT: bne $a2, $a3, .LBB3_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a5, $a4 - ; LA64-NEXT: sc.d $a5, $a0, 0 - ; LA64-NEXT: beqz $a5, .LBB3_3 - ; LA64-NEXT: b .LBB3_6 - ; LA64-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64-NEXT: bne $a2, $a3, .LBB3_1 -@@ -224,14 +220,13 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: bne $a6, $a3, .LBB4_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $t0, $a7 - ; LA64-NEXT: sc.w $t0, $a2, 0 - ; LA64-NEXT: beqz $t0, .LBB4_3 - ; LA64-NEXT: b .LBB4_6 - ; LA64-NEXT: .LBB4_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB4_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 - ; LA64-NEXT: addi.w $a7, $a3, 0 -@@ -283,14 +278,13 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: bne $a6, $a3, .LBB5_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $t0, $a7 - ; LA64-NEXT: sc.w $t0, $a2, 0 - ; LA64-NEXT: beqz $t0, .LBB5_3 - ; LA64-NEXT: b .LBB5_6 - ; LA64-NEXT: .LBB5_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB5_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 - ; LA64-NEXT: addi.w $a7, $a3, 0 -@@ -329,14 +323,13 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: bne $a2, $a4, .LBB6_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a7, $a6 - ; LA64-NEXT: sc.w $a7, $a0, 0 - ; LA64-NEXT: beqz $a7, .LBB6_3 - ; LA64-NEXT: b .LBB6_6 - ; LA64-NEXT: .LBB6_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB6_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 -@@ -373,14 +366,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { - ; LA64-NEXT: bne $a2, $a3, .LBB7_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a5, $a4 - ; LA64-NEXT: sc.d $a5, $a0, 0 - ; LA64-NEXT: beqz $a5, .LBB7_3 - ; LA64-NEXT: b .LBB7_6 - ; LA64-NEXT: .LBB7_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB7_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 - ; LA64-NEXT: bne $a2, $a3, .LBB7_1 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 76e51fe7d3e8..1ac20d10e587 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -21,14 +21,13 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { - ; LA64-NEXT: and $a5, $a4, $a0 - ; LA64-NEXT: bne $a5, $a1, .LBB0_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a5, $a4, $a0 - ; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: sc.w $a5, $a3, 0 - ; LA64-NEXT: beqz $a5, .LBB0_1 - ; LA64-NEXT: b .LBB0_4 - ; LA64-NEXT: .LBB0_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB0_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire -@@ -56,14 +55,13 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind - ; LA64-NEXT: and $a5, $a4, $a0 - ; LA64-NEXT: bne $a5, $a1, .LBB1_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a5, $a4, $a0 - ; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: sc.w $a5, $a3, 0 - ; LA64-NEXT: beqz $a5, .LBB1_1 - ; LA64-NEXT: b .LBB1_4 - ; LA64-NEXT: .LBB1_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB1_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire -@@ -77,13 +75,12 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB2_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB2_1 - ; LA64-NEXT: b .LBB2_4 - ; LA64-NEXT: .LBB2_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB2_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire -@@ -97,13 +94,12 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind - ; LA64-NEXT: ll.d $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB3_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB3_1 - ; LA64-NEXT: b .LBB3_4 - ; LA64-NEXT: .LBB3_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB3_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire -@@ -130,14 +126,13 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: and $a6, $a5, $a4 - ; LA64-NEXT: bne $a6, $a1, .LBB4_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 - ; LA64-NEXT: beqz $a6, .LBB4_1 - ; LA64-NEXT: b .LBB4_4 - ; LA64-NEXT: .LBB4_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB4_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret -@@ -167,14 +162,13 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - ; LA64-NEXT: and $a6, $a5, $a4 - ; LA64-NEXT: bne $a6, $a1, .LBB5_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 - ; LA64-NEXT: beqz $a6, .LBB5_1 - ; LA64-NEXT: b .LBB5_4 - ; LA64-NEXT: .LBB5_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB5_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret -@@ -190,13 +184,12 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB6_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB6_1 - ; LA64-NEXT: b .LBB6_4 - ; LA64-NEXT: .LBB6_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB6_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret -@@ -212,13 +205,12 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou - ; LA64-NEXT: ll.d $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB7_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB7_1 - ; LA64-NEXT: b .LBB7_4 - ; LA64-NEXT: .LBB7_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB7_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret -@@ -247,14 +239,13 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: and $a6, $a5, $a2 - ; LA64-NEXT: bne $a6, $a1, .LBB8_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 - ; LA64-NEXT: beqz $a6, .LBB8_1 - ; LA64-NEXT: b .LBB8_4 - ; LA64-NEXT: .LBB8_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB8_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 -@@ -287,14 +278,13 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - ; LA64-NEXT: and $a6, $a5, $a2 - ; LA64-NEXT: bne $a6, $a1, .LBB9_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 - ; LA64-NEXT: beqz $a6, .LBB9_1 - ; LA64-NEXT: b .LBB9_4 - ; LA64-NEXT: .LBB9_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB9_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 -@@ -313,13 +303,12 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB10_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB10_1 - ; LA64-NEXT: b .LBB10_4 - ; LA64-NEXT: .LBB10_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB10_4: - ; LA64-NEXT: addi.w $a0, $a1, 0 - ; LA64-NEXT: xor $a0, $a3, $a0 -@@ -337,13 +326,12 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw - ; LA64-NEXT: ll.d $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB11_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB11_1 - ; LA64-NEXT: b .LBB11_4 - ; LA64-NEXT: .LBB11_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB11_4: - ; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 -@@ -352,3 +340,343 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw - %res = extractvalue { i64, i1 } %tmp, 1 - ret i1 %res - } -+ -+define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { -+; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: andi $a2, $a2, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: ori $a4, $zero, 255 -+; LA64-NEXT: sll.w $a0, $a4, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a3, 0 -+; LA64-NEXT: and $a5, $a4, $a0 -+; LA64-NEXT: bne $a5, $a1, .LBB12_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -+; LA64-NEXT: andn $a5, $a4, $a0 -+; LA64-NEXT: or $a5, $a5, $a2 -+; LA64-NEXT: sc.w $a5, $a3, 0 -+; LA64-NEXT: beqz $a5, .LBB12_1 -+; LA64-NEXT: b .LBB12_4 -+; LA64-NEXT: .LBB12_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB12_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic -+ ret void -+} -+ -+define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { -+; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a0, $a4, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a3, 0 -+; LA64-NEXT: and $a5, $a4, $a0 -+; LA64-NEXT: bne $a5, $a1, .LBB13_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -+; LA64-NEXT: andn $a5, $a4, $a0 -+; LA64-NEXT: or $a5, $a5, $a2 -+; LA64-NEXT: sc.w $a5, $a3, 0 -+; LA64-NEXT: beqz $a5, .LBB13_1 -+; LA64-NEXT: b .LBB13_4 -+; LA64-NEXT: .LBB13_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB13_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic -+ ret void -+} -+ -+define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { -+; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB14_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.w $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB14_1 -+; LA64-NEXT: b .LBB14_4 -+; LA64-NEXT: .LBB14_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB14_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -+ ret void -+} -+ -+define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { -+; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB15_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.d $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB15_1 -+; LA64-NEXT: b .LBB15_4 -+; LA64-NEXT: .LBB15_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB15_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic -+ ret void -+} -+ -+define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { -+; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a4, $zero, 255 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: andi $a2, $a2, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a3, 0 -+; LA64-NEXT: and $a6, $a5, $a4 -+; LA64-NEXT: bne $a6, $a1, .LBB16_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -+; LA64-NEXT: andn $a6, $a5, $a4 -+; LA64-NEXT: or $a6, $a6, $a2 -+; LA64-NEXT: sc.w $a6, $a3, 0 -+; LA64-NEXT: beqz $a6, .LBB16_1 -+; LA64-NEXT: b .LBB16_4 -+; LA64-NEXT: .LBB16_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB16_4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic -+ %res = extractvalue { i8, i1 } %tmp, 0 -+ ret i8 %res -+} -+ -+define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { -+; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a3, 0 -+; LA64-NEXT: and $a6, $a5, $a4 -+; LA64-NEXT: bne $a6, $a1, .LBB17_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -+; LA64-NEXT: andn $a6, $a5, $a4 -+; LA64-NEXT: or $a6, $a6, $a2 -+; LA64-NEXT: sc.w $a6, $a3, 0 -+; LA64-NEXT: beqz $a6, .LBB17_1 -+; LA64-NEXT: b .LBB17_4 -+; LA64-NEXT: .LBB17_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB17_4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic -+ %res = extractvalue { i16, i1 } %tmp, 0 -+ ret i16 %res -+} -+ -+define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { -+; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB18_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.w $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB18_1 -+; LA64-NEXT: b .LBB18_4 -+; LA64-NEXT: .LBB18_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB18_4: -+; LA64-NEXT: move $a0, $a3 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -+ %res = extractvalue { i32, i1 } %tmp, 0 -+ ret i32 %res -+} -+ -+define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { -+; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB19_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.d $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB19_1 -+; LA64-NEXT: b .LBB19_4 -+; LA64-NEXT: .LBB19_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB19_4: -+; LA64-NEXT: move $a0, $a3 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic -+ %res = extractvalue { i64, i1 } %tmp, 0 -+ ret i64 %res -+} -+ -+define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { -+; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a4, $zero, 255 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: andi $a2, $a2, 255 -+; LA64-NEXT: sll.w $a0, $a2, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: addi.w $a2, $a4, 0 -+; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a3, 0 -+; LA64-NEXT: and $a6, $a5, $a2 -+; LA64-NEXT: bne $a6, $a1, .LBB20_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 -+; LA64-NEXT: andn $a6, $a5, $a2 -+; LA64-NEXT: or $a6, $a6, $a0 -+; LA64-NEXT: sc.w $a6, $a3, 0 -+; LA64-NEXT: beqz $a6, .LBB20_1 -+; LA64-NEXT: b .LBB20_4 -+; LA64-NEXT: .LBB20_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB20_4: -+; LA64-NEXT: and $a0, $a5, $a4 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: xor $a0, $a1, $a0 -+; LA64-NEXT: sltui $a0, $a0, 1 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic -+ %res = extractvalue { i8, i1 } %tmp, 1 -+ ret i1 %res -+} -+ -+define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { -+; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -+; LA64-NEXT: sll.w $a0, $a2, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: addi.w $a2, $a4, 0 -+; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a3, 0 -+; LA64-NEXT: and $a6, $a5, $a2 -+; LA64-NEXT: bne $a6, $a1, .LBB21_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -+; LA64-NEXT: andn $a6, $a5, $a2 -+; LA64-NEXT: or $a6, $a6, $a0 -+; LA64-NEXT: sc.w $a6, $a3, 0 -+; LA64-NEXT: beqz $a6, .LBB21_1 -+; LA64-NEXT: b .LBB21_4 -+; LA64-NEXT: .LBB21_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB21_4: -+; LA64-NEXT: and $a0, $a5, $a4 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: xor $a0, $a1, $a0 -+; LA64-NEXT: sltui $a0, $a0, 1 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic -+ %res = extractvalue { i16, i1 } %tmp, 1 -+ ret i1 %res -+} -+ -+define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { -+; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB22_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.w $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB22_1 -+; LA64-NEXT: b .LBB22_4 -+; LA64-NEXT: .LBB22_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB22_4: -+; LA64-NEXT: addi.w $a0, $a1, 0 -+; LA64-NEXT: xor $a0, $a3, $a0 -+; LA64-NEXT: sltui $a0, $a0, 1 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -+ %res = extractvalue { i32, i1 } %tmp, 1 -+ ret i1 %res -+} -+ -+define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { -+; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB23_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.d $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB23_1 -+; LA64-NEXT: b .LBB23_4 -+; LA64-NEXT: .LBB23_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB23_4: -+; LA64-NEXT: xor $a0, $a3, $a1 -+; LA64-NEXT: sltui $a0, $a0, 1 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic -+ %res = extractvalue { i64, i1 } %tmp, 1 -+ ret i1 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -index 9767717395b6..9a29d67e9982 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -@@ -25,14 +25,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64F-NEXT: bne $a3, $a2, .LBB0_5 - ; LA64F-NEXT: # %bb.4: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 --; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: move $a4, $a1 - ; LA64F-NEXT: sc.w $a4, $a0, 0 - ; LA64F-NEXT: beqz $a4, .LBB0_3 - ; LA64F-NEXT: b .LBB0_6 - ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -61,14 +60,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64D-NEXT: bne $a3, $a2, .LBB0_5 - ; LA64D-NEXT: # %bb.4: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 --; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: move $a4, $a1 - ; LA64D-NEXT: sc.w $a4, $a0, 0 - ; LA64D-NEXT: beqz $a4, .LBB0_3 - ; LA64D-NEXT: b .LBB0_6 - ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -101,14 +99,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64F-NEXT: bne $a3, $a2, .LBB1_5 - ; LA64F-NEXT: # %bb.4: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 --; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: move $a4, $a1 - ; LA64F-NEXT: sc.w $a4, $a0, 0 - ; LA64F-NEXT: beqz $a4, .LBB1_3 - ; LA64F-NEXT: b .LBB1_6 - ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -137,14 +134,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64D-NEXT: bne $a3, $a2, .LBB1_5 - ; LA64D-NEXT: # %bb.4: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 --; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: move $a4, $a1 - ; LA64D-NEXT: sc.w $a4, $a0, 0 - ; LA64D-NEXT: beqz $a4, .LBB1_3 - ; LA64D-NEXT: b .LBB1_6 - ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -178,14 +174,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64F-NEXT: bne $a3, $a2, .LBB2_5 - ; LA64F-NEXT: # %bb.4: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 --; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: move $a4, $a1 - ; LA64F-NEXT: sc.w $a4, $a0, 0 - ; LA64F-NEXT: beqz $a4, .LBB2_3 - ; LA64F-NEXT: b .LBB2_6 - ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -215,14 +210,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64D-NEXT: bne $a3, $a2, .LBB2_5 - ; LA64D-NEXT: # %bb.4: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 --; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: move $a4, $a1 - ; LA64D-NEXT: sc.w $a4, $a0, 0 - ; LA64D-NEXT: beqz $a4, .LBB2_3 - ; LA64D-NEXT: b .LBB2_6 - ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -256,14 +250,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64F-NEXT: bne $a3, $a2, .LBB3_5 - ; LA64F-NEXT: # %bb.4: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 --; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: move $a4, $a1 - ; LA64F-NEXT: sc.w $a4, $a0, 0 - ; LA64F-NEXT: beqz $a4, .LBB3_3 - ; LA64F-NEXT: b .LBB3_6 - ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -293,14 +286,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64D-NEXT: bne $a3, $a2, .LBB3_5 - ; LA64D-NEXT: # %bb.4: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 --; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: move $a4, $a1 - ; LA64D-NEXT: sc.w $a4, $a0, 0 - ; LA64D-NEXT: beqz $a4, .LBB3_3 - ; LA64D-NEXT: b .LBB3_6 - ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll -index cd4a9e7fa9c4..26ba77e8d4fd 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll -@@ -17,7 +17,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a6, $a4, $a3 - ; LA64-NEXT: move $a5, $a4 -@@ -30,8 +29,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sc.w $a5, $a2, 0 - ; LA64-NEXT: beqz $a5, .LBB0_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw umax ptr %a, i8 %b acquire -@@ -52,7 +49,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a6, $a4, $a3 - ; LA64-NEXT: move $a5, $a4 -@@ -65,8 +61,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sc.w $a5, $a2, 0 - ; LA64-NEXT: beqz $a5, .LBB1_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw umax ptr %a, i16 %b acquire -@@ -106,7 +100,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a6, $a4, $a3 - ; LA64-NEXT: move $a5, $a4 -@@ -119,8 +112,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sc.w $a5, $a2, 0 - ; LA64-NEXT: beqz $a5, .LBB4_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw umin ptr %a, i8 %b acquire -@@ -141,7 +132,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a6, $a4, $a3 - ; LA64-NEXT: move $a5, $a4 -@@ -154,8 +144,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sc.w $a5, $a2, 0 - ; LA64-NEXT: beqz $a5, .LBB5_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw umin ptr %a, i16 %b acquire -@@ -197,7 +185,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: andi $a4, $a0, 24 - ; LA64-NEXT: xori $a4, $a4, 56 - ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a5, $a2, 0 - ; LA64-NEXT: and $a7, $a5, $a3 - ; LA64-NEXT: move $a6, $a5 -@@ -212,8 +199,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sc.w $a6, $a2, 0 - ; LA64-NEXT: beqz $a6, .LBB8_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw max ptr %a, i8 %b acquire -@@ -237,7 +222,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a5, $a2, 0 - ; LA64-NEXT: and $a7, $a5, $a4 - ; LA64-NEXT: move $a6, $a5 -@@ -252,8 +236,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sc.w $a6, $a2, 0 - ; LA64-NEXT: beqz $a6, .LBB9_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw max ptr %a, i16 %b acquire -@@ -295,7 +277,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: andi $a4, $a0, 24 - ; LA64-NEXT: xori $a4, $a4, 56 - ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a5, $a2, 0 - ; LA64-NEXT: and $a7, $a5, $a3 - ; LA64-NEXT: move $a6, $a5 -@@ -310,8 +291,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sc.w $a6, $a2, 0 - ; LA64-NEXT: beqz $a6, .LBB12_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw min ptr %a, i8 %b acquire -@@ -335,7 +314,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a5, $a2, 0 - ; LA64-NEXT: and $a7, $a5, $a4 - ; LA64-NEXT: move $a6, $a5 -@@ -350,8 +328,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sc.w $a6, $a2, 0 - ; LA64-NEXT: beqz $a6, .LBB13_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw min ptr %a, i16 %b acquire -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll -index c077d14f728f..626276ba05f7 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll -@@ -13,7 +13,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: addi.w $a5, $a1, 0 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -37,7 +36,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: addi.w $a5, $a1, 0 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -64,7 +62,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: addi.w $a5, $a1, 0 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -89,7 +86,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: addi.w $a5, $a1, 0 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -108,7 +104,6 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_xchg_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: move $a3, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -157,7 +152,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: add.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -181,7 +175,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: add.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -208,7 +201,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: add.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -233,7 +225,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: add.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -252,7 +243,6 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_add_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: add.w $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -301,7 +291,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: sub.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -325,7 +314,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: sub.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -352,7 +340,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: sub.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -377,7 +364,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: sub.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -396,7 +382,6 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_sub_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: sub.w $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -447,7 +432,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: and $a5, $a4, $a1 - ; LA32-NEXT: nor $a5, $a5, $zero -@@ -472,7 +456,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a5, $a4, $a1 - ; LA64-NEXT: nor $a5, $a5, $zero -@@ -500,7 +483,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: and $a5, $a4, $a1 - ; LA32-NEXT: nor $a5, $a5, $zero -@@ -526,7 +508,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a5, $a4, $a1 - ; LA64-NEXT: nor $a5, $a5, $zero -@@ -546,7 +527,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_nand_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: and $a3, $a2, $a1 - ; LA32-NEXT: nor $a3, $a3, $zero -@@ -559,7 +539,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA64-LABEL: atomicrmw_nand_i32_acquire: - ; LA64: # %bb.0: - ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a2, $a0, 0 - ; LA64-NEXT: and $a3, $a2, $a1 - ; LA64-NEXT: nor $a3, $a3, $zero -@@ -586,7 +565,6 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { - ; LA64-LABEL: atomicrmw_nand_i64_acquire: - ; LA64: # %bb.0: - ; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.d $a2, $a0, 0 - ; LA64-NEXT: and $a3, $a2, $a1 - ; LA64-NEXT: nor $a3, $a3, $zero -@@ -611,7 +589,6 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: addi.w $a3, $zero, -4 - ; LA32-NEXT: and $a0, $a0, $a3 - ; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a0, 0 - ; LA32-NEXT: and $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a0, 0 -@@ -650,7 +627,6 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: addi.w $a2, $zero, -4 - ; LA32-NEXT: and $a0, $a0, $a2 - ; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: and $a4, $a2, $a1 - ; LA32-NEXT: sc.w $a4, $a0, 0 -@@ -681,7 +657,6 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_and_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: and $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -728,7 +703,6 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: or $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 -@@ -760,7 +734,6 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: or $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 -@@ -787,7 +760,6 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_or_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: or $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -834,7 +806,6 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: xor $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 -@@ -866,7 +837,6 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: xor $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 -@@ -893,7 +863,6 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_xor_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: xor $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp -index 1a5d4369c48b..3180c7237a79 100644 ---- a/llvm/unittests/Target/LoongArch/InstSizes.cpp -+++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp -@@ -121,7 +121,7 @@ TEST(InstSizes, AtomicPseudo) { - " dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoAtomicLoadAdd32 renamable $r7, renamable $r6, renamable $r8\n" - " dead early-clobber renamable $r5, dead early-clobber renamable $r9, dead early-clobber renamable $r10 = PseudoMaskedAtomicLoadUMax32 renamable $r7, renamable $r6, renamable $r8, 4\n" - " early-clobber renamable $r9, dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoMaskedAtomicLoadMax32 killed renamable $r6, killed renamable $r5, killed renamable $r7, killed renamable $r8, 4\n" -- " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6\n" -+ " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6, 4\n" - " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoMaskedCmpXchg32 killed renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r8, 4\n", - // clang-format on - [](LoongArchInstrInfo &II, MachineFunction &MF) { --- -2.20.1 - - -From 5f2a6174965bccaeefdeb410cf67ea0cb378b26c Mon Sep 17 00:00:00 2001 +From 3d3161280313c38aad695bc96a27bdba5a804e53 Mon Sep 17 00:00:00 2001 From: hev Date: Wed, 11 Oct 2023 18:28:04 +0800 Subject: [PATCH 2/7] [LoongArch] Add some atomic tests (#68766) (cherry picked from commit 37b93f07cd7ba2b1e6e81116cd49d34396b7b70a) + --- .../LoongArch/ir-instruction/atomicrmw-fp.ll | 2714 ++++++++++++- .../ir-instruction/atomicrmw-minmax.ll | 1400 +++++++ @@ -9460,2967 +8012,96 @@ index e91d0c145eab..deff11723d27 100644 +; LA32-LABEL: load_monotonic_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: .cfi_def_cfa_offset 16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: move $a1, $zero -+; LA32-NEXT: bl %plt(__atomic_load_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_monotonic_i64: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.d $a0, $a0, 0 -+; LA64-NEXT: ret -+ %val = load atomic i64, ptr %ptr monotonic, align 8 -+ ret i64 %val -+} -+ -+define i8 @load_seq_cst_i8(ptr %ptr) { -+; LA32-LABEL: load_seq_cst_i8: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.b $a0, $a0, 0 -+; LA32-NEXT: dbar 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_seq_cst_i8: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.b $a0, $a0, 0 -+; LA64-NEXT: dbar 0 -+; LA64-NEXT: ret -+ %val = load atomic i8, ptr %ptr seq_cst, align 1 -+ ret i8 %val -+} -+ -+define i16 @load_seq_cst_i16(ptr %ptr) { -+; LA32-LABEL: load_seq_cst_i16: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.h $a0, $a0, 0 -+; LA32-NEXT: dbar 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_seq_cst_i16: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.h $a0, $a0, 0 -+; LA64-NEXT: dbar 0 -+; LA64-NEXT: ret -+ %val = load atomic i16, ptr %ptr seq_cst, align 2 -+ ret i16 %val -+} -+ -+define i32 @load_seq_cst_i32(ptr %ptr) { -+; LA32-LABEL: load_seq_cst_i32: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.w $a0, $a0, 0 -+; LA32-NEXT: dbar 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_seq_cst_i32: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.w $a0, $a0, 0 -+; LA64-NEXT: dbar 0 -+; LA64-NEXT: ret -+ %val = load atomic i32, ptr %ptr seq_cst, align 4 -+ ret i32 %val -+} -+ -+define i64 @load_seq_cst_i64(ptr %ptr) { -+; LA32-LABEL: load_seq_cst_i64: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: .cfi_def_cfa_offset 16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: ori $a1, $zero, 5 -+; LA32-NEXT: bl %plt(__atomic_load_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_seq_cst_i64: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.d $a0, $a0, 0 -+; LA64-NEXT: dbar 0 -+; LA64-NEXT: ret -+ %val = load atomic i64, ptr %ptr seq_cst, align 8 -+ ret i64 %val -+} -+ - define void @store_release_i8(ptr %ptr, i8 signext %v) { - ; LA32-LABEL: store_release_i8: - ; LA32: # %bb.0: --- -2.20.1 - - -From 0f189600f07f701d96940c2cc52ca762d2be9104 Mon Sep 17 00:00:00 2001 -From: WANG Xuerui -Date: Wed, 11 Oct 2023 10:39:13 +0800 -Subject: [PATCH 3/7] [LoongArch] Support finer-grained DBAR hints for LA664+ - (#68787) - -These are treated as DBAR 0 on older uarchs, so we can start to -unconditionally emit the new hints right away. - -Co-authored-by: WANG Rui -(cherry picked from commit 956482de13107b640cffedd08610fcccd98f708f) ---- - .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- - .../LoongArch/LoongArchISelLowering.cpp | 20 +++++++ - .../Target/LoongArch/LoongArchISelLowering.h | 1 + - .../Target/LoongArch/LoongArchInstrInfo.td | 24 +++++++- - .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 16 ++--- - .../ir-instruction/atomic-cmpxchg.ll | 24 ++++---- - .../LoongArch/ir-instruction/atomicrmw-fp.ll | 48 +++++++-------- - .../ir-instruction/fence-singlethread.ll | 4 +- - .../CodeGen/LoongArch/ir-instruction/fence.ll | 16 ++--- - .../ir-instruction/load-store-atomic.ll | 58 +++++++++---------- - 10 files changed, 129 insertions(+), 86 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -index eb78ef065b21..b348cb56c136 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -@@ -579,8 +579,8 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - case AtomicOrdering::Acquire: - case AtomicOrdering::AcquireRelease: - case AtomicOrdering::SequentiallyConsistent: -- // TODO: acquire -- hint = 0; -+ // acquire -+ hint = 0b10100; - break; - default: - hint = 0x700; -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 5affaf37ad5a..33a3197013cc 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -159,6 +159,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - // The MULO libcall is not part of libgcc, only compiler-rt. - setLibcallName(RTLIB::MULO_I128, nullptr); - -+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); -+ - static const ISD::CondCode FPCCToExpand[] = { - ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, - ISD::SETGE, ISD::SETNE, ISD::SETGT}; -@@ -366,6 +368,8 @@ bool LoongArchTargetLowering::isOffsetFoldingLegal( - SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { -+ case ISD::ATOMIC_FENCE: -+ return lowerATOMIC_FENCE(Op, DAG); - case ISD::EH_DWARF_CFA: - return lowerEH_DWARF_CFA(Op, DAG); - case ISD::GlobalAddress: -@@ -542,6 +546,22 @@ LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, - return SDValue(); - } - -+SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, -+ SelectionDAG &DAG) const { -+ SDLoc DL(Op); -+ SyncScope::ID FenceSSID = -+ static_cast(Op.getConstantOperandVal(2)); -+ -+ // singlethread fences only synchronize with signal handlers on the same -+ // thread and thus only need to preserve instruction order, not actually -+ // enforce memory ordering. -+ if (FenceSSID == SyncScope::SingleThread) -+ // MEMBARRIER is a compiler barrier; it codegens to a no-op. -+ return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); -+ -+ return Op; -+} -+ - SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, - SelectionDAG &DAG) const { - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 6b5a851ec55d..23b90640a690 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -266,6 +266,7 @@ private: - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr &MI, - MachineBasicBlock *BB) const override; -+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index a9b0db30c2f6..fcbd314507a5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -1590,7 +1590,29 @@ def : RegRegStPat; - - /// Atomic loads and stores - --def : Pat<(atomic_fence timm, timm), (DBAR 0)>; -+// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from -+// the Linux patch revealing it [1]: -+// -+// - Bit 4: kind of constraint (0: completion, 1: ordering) -+// - Bit 3: barrier for previous read (0: true, 1: false) -+// - Bit 2: barrier for previous write (0: true, 1: false) -+// - Bit 1: barrier for succeeding read (0: true, 1: false) -+// - Bit 0: barrier for succeeding write (0: true, 1: false) -+// -+// Hint 0x700: barrier for "read after read" from the same address, which is -+// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as -+// nop if such reordering is disabled on supporting newer models.) -+// -+// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ -+// -+// Implementations without support for the finer-granularity hints simply treat -+// all as the full barrier (DBAR 0), so we can unconditionally start emiting the -+// more precise hints right away. -+ -+def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire -+def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release -+def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel -+def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst - - defm : LdPat; - defm : LdPat; -diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -index 32106886c783..d8908acbc945 100644 ---- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -@@ -40,7 +40,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: b .LBB0_6 - ; LA64-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64-NEXT: addi.w $a6, $a3, 0 -@@ -93,7 +93,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: b .LBB1_6 - ; LA64-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64-NEXT: addi.w $a6, $a3, 0 -@@ -133,7 +133,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: b .LBB2_6 - ; LA64-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64-NEXT: move $a3, $a1 -@@ -171,7 +171,7 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { - ; LA64-NEXT: b .LBB3_6 - ; LA64-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64-NEXT: bne $a2, $a3, .LBB3_1 -@@ -226,7 +226,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: b .LBB4_6 - ; LA64-NEXT: .LBB4_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB4_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 - ; LA64-NEXT: addi.w $a7, $a3, 0 -@@ -284,7 +284,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: b .LBB5_6 - ; LA64-NEXT: .LBB5_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB5_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 - ; LA64-NEXT: addi.w $a7, $a3, 0 -@@ -329,7 +329,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: b .LBB6_6 - ; LA64-NEXT: .LBB6_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB6_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 -@@ -372,7 +372,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { - ; LA64-NEXT: b .LBB7_6 - ; LA64-NEXT: .LBB7_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB7_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 - ; LA64-NEXT: bne $a2, $a3, .LBB7_1 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 1ac20d10e587..4f25a1d69af1 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -27,7 +27,7 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { - ; LA64-NEXT: beqz $a5, .LBB0_1 - ; LA64-NEXT: b .LBB0_4 - ; LA64-NEXT: .LBB0_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB0_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire -@@ -61,7 +61,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind - ; LA64-NEXT: beqz $a5, .LBB1_1 - ; LA64-NEXT: b .LBB1_4 - ; LA64-NEXT: .LBB1_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB1_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire -@@ -80,7 +80,7 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind - ; LA64-NEXT: beqz $a4, .LBB2_1 - ; LA64-NEXT: b .LBB2_4 - ; LA64-NEXT: .LBB2_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB2_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire -@@ -99,7 +99,7 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind - ; LA64-NEXT: beqz $a4, .LBB3_1 - ; LA64-NEXT: b .LBB3_4 - ; LA64-NEXT: .LBB3_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB3_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire -@@ -132,7 +132,7 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: beqz $a6, .LBB4_1 - ; LA64-NEXT: b .LBB4_4 - ; LA64-NEXT: .LBB4_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB4_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret -@@ -168,7 +168,7 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - ; LA64-NEXT: beqz $a6, .LBB5_1 - ; LA64-NEXT: b .LBB5_4 - ; LA64-NEXT: .LBB5_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB5_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret -@@ -189,7 +189,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou - ; LA64-NEXT: beqz $a4, .LBB6_1 - ; LA64-NEXT: b .LBB6_4 - ; LA64-NEXT: .LBB6_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB6_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret -@@ -210,7 +210,7 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou - ; LA64-NEXT: beqz $a4, .LBB7_1 - ; LA64-NEXT: b .LBB7_4 - ; LA64-NEXT: .LBB7_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB7_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret -@@ -245,7 +245,7 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: beqz $a6, .LBB8_1 - ; LA64-NEXT: b .LBB8_4 - ; LA64-NEXT: .LBB8_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB8_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 -@@ -284,7 +284,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - ; LA64-NEXT: beqz $a6, .LBB9_1 - ; LA64-NEXT: b .LBB9_4 - ; LA64-NEXT: .LBB9_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB9_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 -@@ -308,7 +308,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw - ; LA64-NEXT: beqz $a4, .LBB10_1 - ; LA64-NEXT: b .LBB10_4 - ; LA64-NEXT: .LBB10_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB10_4: - ; LA64-NEXT: addi.w $a0, $a1, 0 - ; LA64-NEXT: xor $a0, $a3, $a0 -@@ -331,7 +331,7 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw - ; LA64-NEXT: beqz $a4, .LBB11_1 - ; LA64-NEXT: b .LBB11_4 - ; LA64-NEXT: .LBB11_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB11_4: - ; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -index 02d481cb3865..589360823b14 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -@@ -29,7 +29,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB0_6 - ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -64,7 +64,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB0_6 - ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -103,7 +103,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB1_6 - ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -138,7 +138,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB1_6 - ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -178,7 +178,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB2_6 - ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -214,7 +214,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB2_6 - ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -254,7 +254,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB3_6 - ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -290,7 +290,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB3_6 - ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -1385,7 +1385,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB16_6 - ; LA64F-NEXT: .LBB16_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -1420,7 +1420,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB16_6 - ; LA64D-NEXT: .LBB16_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -1459,7 +1459,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB17_6 - ; LA64F-NEXT: .LBB17_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -1494,7 +1494,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB17_6 - ; LA64D-NEXT: .LBB17_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -1534,7 +1534,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB18_6 - ; LA64F-NEXT: .LBB18_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -1570,7 +1570,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB18_6 - ; LA64D-NEXT: .LBB18_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -1610,7 +1610,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB19_6 - ; LA64F-NEXT: .LBB19_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -1646,7 +1646,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB19_6 - ; LA64D-NEXT: .LBB19_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -2087,7 +2087,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB24_6 - ; LA64F-NEXT: .LBB24_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -2122,7 +2122,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB24_6 - ; LA64D-NEXT: .LBB24_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -2161,7 +2161,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB25_6 - ; LA64F-NEXT: .LBB25_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -2196,7 +2196,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB25_6 - ; LA64D-NEXT: .LBB25_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -2236,7 +2236,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB26_6 - ; LA64F-NEXT: .LBB26_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -2272,7 +2272,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB26_6 - ; LA64D-NEXT: .LBB26_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -2312,7 +2312,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB27_6 - ; LA64F-NEXT: .LBB27_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -2348,7 +2348,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB27_6 - ; LA64D-NEXT: .LBB27_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll -index 8d6056bc7677..a8b164a4cd3c 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll -@@ -5,12 +5,12 @@ - define void @fence_singlethread() { - ; LA32-LABEL: fence_singlethread: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: #MEMBARRIER - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_singlethread: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: #MEMBARRIER - ; LA64-NEXT: ret - fence syncscope("singlethread") seq_cst - ret void -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll -index 724639f3c6fb..c5b2232f9b80 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll -@@ -5,12 +5,12 @@ - define void @fence_acquire() nounwind { - ; LA32-LABEL: fence_acquire: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 20 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_acquire: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - fence acquire - ret void -@@ -19,12 +19,12 @@ define void @fence_acquire() nounwind { - define void @fence_release() nounwind { - ; LA32-LABEL: fence_release: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 18 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_release: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 18 - ; LA64-NEXT: ret - fence release - ret void -@@ -33,12 +33,12 @@ define void @fence_release() nounwind { - define void @fence_acq_rel() nounwind { - ; LA32-LABEL: fence_acq_rel: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_acq_rel: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - fence acq_rel - ret void -@@ -47,12 +47,12 @@ define void @fence_acq_rel() nounwind { - define void @fence_seq_cst() nounwind { - ; LA32-LABEL: fence_seq_cst: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_seq_cst: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - fence seq_cst - ret void -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll -index deff11723d27..8b170c479eed 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll -@@ -6,13 +6,13 @@ define i8 @load_acquire_i8(ptr %ptr) { - ; LA32-LABEL: load_acquire_i8: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.b $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 20 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_acquire_i8: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.b $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - %val = load atomic i8, ptr %ptr acquire, align 1 - ret i8 %val -@@ -22,13 +22,13 @@ define i16 @load_acquire_i16(ptr %ptr) { - ; LA32-LABEL: load_acquire_i16: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.h $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 20 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_acquire_i16: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.h $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - %val = load atomic i16, ptr %ptr acquire, align 2 - ret i16 %val -@@ -38,13 +38,13 @@ define i32 @load_acquire_i32(ptr %ptr) { - ; LA32-LABEL: load_acquire_i32: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.w $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 20 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_acquire_i32: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.w $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - %val = load atomic i32, ptr %ptr acquire, align 4 - ret i32 %val -@@ -66,7 +66,7 @@ define i64 @load_acquire_i64(ptr %ptr) { - ; LA64-LABEL: load_acquire_i64: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.d $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - %val = load atomic i64, ptr %ptr acquire, align 8 - ret i64 %val -@@ -202,13 +202,13 @@ define i8 @load_seq_cst_i8(ptr %ptr) { - ; LA32-LABEL: load_seq_cst_i8: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.b $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_seq_cst_i8: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.b $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - %val = load atomic i8, ptr %ptr seq_cst, align 1 - ret i8 %val -@@ -218,13 +218,13 @@ define i16 @load_seq_cst_i16(ptr %ptr) { - ; LA32-LABEL: load_seq_cst_i16: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.h $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_seq_cst_i16: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.h $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - %val = load atomic i16, ptr %ptr seq_cst, align 2 - ret i16 %val -@@ -234,13 +234,13 @@ define i32 @load_seq_cst_i32(ptr %ptr) { - ; LA32-LABEL: load_seq_cst_i32: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.w $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_seq_cst_i32: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.w $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - %val = load atomic i32, ptr %ptr seq_cst, align 4 - ret i32 %val -@@ -262,7 +262,7 @@ define i64 @load_seq_cst_i64(ptr %ptr) { - ; LA64-LABEL: load_seq_cst_i64: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.d $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - %val = load atomic i64, ptr %ptr seq_cst, align 8 - ret i64 %val -@@ -271,13 +271,13 @@ define i64 @load_seq_cst_i64(ptr %ptr) { - define void @store_release_i8(ptr %ptr, i8 signext %v) { - ; LA32-LABEL: store_release_i8: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 18 - ; LA32-NEXT: st.b $a1, $a0, 0 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_release_i8: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 18 - ; LA64-NEXT: st.b $a1, $a0, 0 - ; LA64-NEXT: ret - store atomic i8 %v, ptr %ptr release, align 1 -@@ -287,13 +287,13 @@ define void @store_release_i8(ptr %ptr, i8 signext %v) { - define void @store_release_i16(ptr %ptr, i16 signext %v) { - ; LA32-LABEL: store_release_i16: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 18 - ; LA32-NEXT: st.h $a1, $a0, 0 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_release_i16: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 18 - ; LA64-NEXT: st.h $a1, $a0, 0 - ; LA64-NEXT: ret - store atomic i16 %v, ptr %ptr release, align 2 -@@ -303,7 +303,7 @@ define void @store_release_i16(ptr %ptr, i16 signext %v) { - define void @store_release_i32(ptr %ptr, i32 signext %v) { - ; LA32-LABEL: store_release_i32: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 18 - ; LA32-NEXT: st.w $a1, $a0, 0 - ; LA32-NEXT: ret - ; -@@ -465,16 +465,16 @@ define void @store_monotonic_i64(ptr %ptr, i64 %v) { - define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { - ; LA32-LABEL: store_seq_cst_i8: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: st.b $a1, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_seq_cst_i8: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: st.b $a1, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - store atomic i8 %v, ptr %ptr seq_cst, align 1 - ret void -@@ -483,16 +483,16 @@ define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { - define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { - ; LA32-LABEL: store_seq_cst_i16: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: st.h $a1, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_seq_cst_i16: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: st.h $a1, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - store atomic i16 %v, ptr %ptr seq_cst, align 2 - ret void -@@ -501,9 +501,9 @@ define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { - define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { - ; LA32-LABEL: store_seq_cst_i32: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: st.w $a1, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_seq_cst_i32: --- -2.20.1 - - -From 7e37560cddfa108426ff9f87871c71ed01e7596e Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Tue, 17 Oct 2023 17:41:32 +0800 -Subject: [PATCH 4/7] [LoongArch] Precommit a test for atomic cmpxchg - optmization - -(cherry picked from commit b2773d170cb4bdb4b19ba801b5eb55395024b3ae) ---- - .../ir-instruction/atomic-cmpxchg.ll | 385 +++++++++++------- - 1 file changed, 245 insertions(+), 140 deletions(-) - -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 4f25a1d69af1..174bb9d0ff7d 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -106,6 +106,111 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind - ret void - } - -+define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { -+; LA64-LABEL: cmpxchg_i8_acquire_monotonic: ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: move $a1, $zero ++; LA32-NEXT: bl %plt(__atomic_load_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_monotonic_i64: +; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: andi $a2, $a2, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: ori $a4, $zero, 255 -+; LA64-NEXT: sll.w $a0, $a4, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a3, 0 -+; LA64-NEXT: and $a5, $a4, $a0 -+; LA64-NEXT: bne $a5, $a1, .LBB4_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -+; LA64-NEXT: andn $a5, $a4, $a0 -+; LA64-NEXT: or $a5, $a5, $a2 -+; LA64-NEXT: sc.w $a5, $a3, 0 -+; LA64-NEXT: beqz $a5, .LBB4_1 -+; LA64-NEXT: b .LBB4_4 -+; LA64-NEXT: .LBB4_3: -+; LA64-NEXT: dbar 20 -+; LA64-NEXT: .LBB4_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic -+ ret void ++; LA64-NEXT: ld.d $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i64, ptr %ptr monotonic, align 8 ++ ret i64 %val +} + -+define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { -+; LA64-LABEL: cmpxchg_i16_acquire_monotonic: ++define i8 @load_seq_cst_i8(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i8: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.b $a0, $a0, 0 ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i8: +; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a0, $a4, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a3, 0 -+; LA64-NEXT: and $a5, $a4, $a0 -+; LA64-NEXT: bne $a5, $a1, .LBB5_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -+; LA64-NEXT: andn $a5, $a4, $a0 -+; LA64-NEXT: or $a5, $a5, $a2 -+; LA64-NEXT: sc.w $a5, $a3, 0 -+; LA64-NEXT: beqz $a5, .LBB5_1 -+; LA64-NEXT: b .LBB5_4 -+; LA64-NEXT: .LBB5_3: -+; LA64-NEXT: dbar 20 -+; LA64-NEXT: .LBB5_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic -+ ret void ++; LA64-NEXT: ld.b $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i8, ptr %ptr seq_cst, align 1 ++ ret i8 %val +} + -+define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { -+; LA64-LABEL: cmpxchg_i32_acquire_monotonic: ++define i16 @load_seq_cst_i16(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i16: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.h $a0, $a0, 0 ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i16: +; LA64: # %bb.0: -+; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB6_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.w $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB6_1 -+; LA64-NEXT: b .LBB6_4 -+; LA64-NEXT: .LBB6_3: -+; LA64-NEXT: dbar 20 -+; LA64-NEXT: .LBB6_4: ++; LA64-NEXT: ld.h $a0, $a0, 0 ++; LA64-NEXT: dbar 0 +; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic -+ ret void ++ %val = load atomic i16, ptr %ptr seq_cst, align 2 ++ ret i16 %val +} + -+define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { -+; LA64-LABEL: cmpxchg_i64_acquire_monotonic: ++define i32 @load_seq_cst_i32(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i32: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i32: +; LA64: # %bb.0: -+; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB7_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.d $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB7_1 -+; LA64-NEXT: b .LBB7_4 -+; LA64-NEXT: .LBB7_3: -+; LA64-NEXT: dbar 20 -+; LA64-NEXT: .LBB7_4: ++; LA64-NEXT: ld.w $a0, $a0, 0 ++; LA64-NEXT: dbar 0 +; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic -+ ret void ++ %val = load atomic i32, ptr %ptr seq_cst, align 4 ++ ret i32 %val +} + - define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { - ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: - ; LA64: # %bb.0: -@@ -121,19 +226,19 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: andi $a1, $a1, 255 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a4 --; LA64-NEXT: bne $a6, $a1, .LBB4_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB8_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB4_1 --; LA64-NEXT: b .LBB4_4 --; LA64-NEXT: .LBB4_3: -+; LA64-NEXT: beqz $a6, .LBB8_1 -+; LA64-NEXT: b .LBB8_4 -+; LA64-NEXT: .LBB8_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB4_4: -+; LA64-NEXT: .LBB8_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire -@@ -157,19 +262,19 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a4 --; LA64-NEXT: bne $a6, $a1, .LBB5_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB9_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB5_1 --; LA64-NEXT: b .LBB5_4 --; LA64-NEXT: .LBB5_3: -+; LA64-NEXT: beqz $a6, .LBB9_1 -+; LA64-NEXT: b .LBB9_4 -+; LA64-NEXT: .LBB9_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB5_4: -+; LA64-NEXT: .LBB9_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire -@@ -180,17 +285,17 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB6_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB10_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB6_1 --; LA64-NEXT: b .LBB6_4 --; LA64-NEXT: .LBB6_3: -+; LA64-NEXT: beqz $a4, .LBB10_1 -+; LA64-NEXT: b .LBB10_4 -+; LA64-NEXT: .LBB10_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB6_4: -+; LA64-NEXT: .LBB10_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire -@@ -201,17 +306,17 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou - define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB7_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB11_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB7_1 --; LA64-NEXT: b .LBB7_4 --; LA64-NEXT: .LBB7_3: -+; LA64-NEXT: beqz $a4, .LBB11_1 -+; LA64-NEXT: b .LBB11_4 -+; LA64-NEXT: .LBB11_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB7_4: -+; LA64-NEXT: .LBB11_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire -@@ -234,19 +339,19 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: addi.w $a2, $a4, 0 --; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a2 --; LA64-NEXT: bne $a6, $a1, .LBB8_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB12_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB8_1 --; LA64-NEXT: b .LBB8_4 --; LA64-NEXT: .LBB8_3: -+; LA64-NEXT: beqz $a6, .LBB12_1 -+; LA64-NEXT: b .LBB12_4 -+; LA64-NEXT: .LBB12_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB8_4: -+; LA64-NEXT: .LBB12_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: xor $a0, $a1, $a0 -@@ -273,19 +378,19 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: addi.w $a2, $a4, 0 --; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a2 --; LA64-NEXT: bne $a6, $a1, .LBB9_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB13_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB9_1 --; LA64-NEXT: b .LBB9_4 --; LA64-NEXT: .LBB9_3: -+; LA64-NEXT: beqz $a6, .LBB13_1 -+; LA64-NEXT: b .LBB13_4 -+; LA64-NEXT: .LBB13_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB9_4: -+; LA64-NEXT: .LBB13_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: xor $a0, $a1, $a0 -@@ -299,17 +404,17 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB10_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB14_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB10_1 --; LA64-NEXT: b .LBB10_4 --; LA64-NEXT: .LBB10_3: -+; LA64-NEXT: beqz $a4, .LBB14_1 -+; LA64-NEXT: b .LBB14_4 -+; LA64-NEXT: .LBB14_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB10_4: -+; LA64-NEXT: .LBB14_4: - ; LA64-NEXT: addi.w $a0, $a1, 0 - ; LA64-NEXT: xor $a0, $a3, $a0 - ; LA64-NEXT: sltui $a0, $a0, 1 -@@ -322,17 +427,17 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw - define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB11_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB15_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB11_1 --; LA64-NEXT: b .LBB11_4 --; LA64-NEXT: .LBB11_3: -+; LA64-NEXT: beqz $a4, .LBB15_1 -+; LA64-NEXT: b .LBB15_4 -+; LA64-NEXT: .LBB15_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB11_4: -+; LA64-NEXT: .LBB15_4: - ; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 - ; LA64-NEXT: ret -@@ -356,19 +461,19 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a2, $a2, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a3, 0 - ; LA64-NEXT: and $a5, $a4, $a0 --; LA64-NEXT: bne $a5, $a1, .LBB12_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -+; LA64-NEXT: bne $a5, $a1, .LBB16_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 - ; LA64-NEXT: andn $a5, $a4, $a0 - ; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: sc.w $a5, $a3, 0 --; LA64-NEXT: beqz $a5, .LBB12_1 --; LA64-NEXT: b .LBB12_4 --; LA64-NEXT: .LBB12_3: -+; LA64-NEXT: beqz $a5, .LBB16_1 -+; LA64-NEXT: b .LBB16_4 -+; LA64-NEXT: .LBB16_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB12_4: -+; LA64-NEXT: .LBB16_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic - ret void -@@ -390,19 +495,19 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a2, $a2, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a3, 0 - ; LA64-NEXT: and $a5, $a4, $a0 --; LA64-NEXT: bne $a5, $a1, .LBB13_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -+; LA64-NEXT: bne $a5, $a1, .LBB17_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 - ; LA64-NEXT: andn $a5, $a4, $a0 - ; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: sc.w $a5, $a3, 0 --; LA64-NEXT: beqz $a5, .LBB13_1 --; LA64-NEXT: b .LBB13_4 --; LA64-NEXT: .LBB13_3: -+; LA64-NEXT: beqz $a5, .LBB17_1 -+; LA64-NEXT: b .LBB17_4 -+; LA64-NEXT: .LBB17_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB13_4: -+; LA64-NEXT: .LBB17_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic - ret void -@@ -411,17 +516,17 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw - define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB14_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB18_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB14_1 --; LA64-NEXT: b .LBB14_4 --; LA64-NEXT: .LBB14_3: -+; LA64-NEXT: beqz $a4, .LBB18_1 -+; LA64-NEXT: b .LBB18_4 -+; LA64-NEXT: .LBB18_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB14_4: -+; LA64-NEXT: .LBB18_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic - ret void -@@ -430,17 +535,17 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw - define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB15_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB19_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB15_1 --; LA64-NEXT: b .LBB15_4 --; LA64-NEXT: .LBB15_3: -+; LA64-NEXT: beqz $a4, .LBB19_1 -+; LA64-NEXT: b .LBB19_4 -+; LA64-NEXT: .LBB19_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB15_4: -+; LA64-NEXT: .LBB19_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic - ret void -@@ -461,19 +566,19 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun - ; LA64-NEXT: andi $a1, $a1, 255 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a4 --; LA64-NEXT: bne $a6, $a1, .LBB16_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB20_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB16_1 --; LA64-NEXT: b .LBB16_4 --; LA64-NEXT: .LBB16_3: -+; LA64-NEXT: beqz $a6, .LBB20_1 -+; LA64-NEXT: b .LBB20_4 -+; LA64-NEXT: .LBB20_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB16_4: -+; LA64-NEXT: .LBB20_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic -@@ -497,19 +602,19 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) - ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a4 --; LA64-NEXT: bne $a6, $a1, .LBB17_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB21_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB17_1 --; LA64-NEXT: b .LBB17_4 --; LA64-NEXT: .LBB17_3: -+; LA64-NEXT: beqz $a6, .LBB21_1 -+; LA64-NEXT: b .LBB21_4 -+; LA64-NEXT: .LBB21_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB17_4: -+; LA64-NEXT: .LBB21_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic -@@ -520,17 +625,17 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) - define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB18_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB22_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB18_1 --; LA64-NEXT: b .LBB18_4 --; LA64-NEXT: .LBB18_3: -+; LA64-NEXT: beqz $a4, .LBB22_1 -+; LA64-NEXT: b .LBB22_4 -+; LA64-NEXT: .LBB22_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB18_4: -+; LA64-NEXT: .LBB22_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -@@ -541,17 +646,17 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) - define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB19_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB23_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB19_1 --; LA64-NEXT: b .LBB19_4 --; LA64-NEXT: .LBB19_3: -+; LA64-NEXT: beqz $a4, .LBB23_1 -+; LA64-NEXT: b .LBB23_4 -+; LA64-NEXT: .LBB23_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB19_4: -+; LA64-NEXT: .LBB23_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic -@@ -574,19 +679,19 @@ define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) noun - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: addi.w $a2, $a4, 0 --; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a2 --; LA64-NEXT: bne $a6, $a1, .LBB20_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB24_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB20_1 --; LA64-NEXT: b .LBB20_4 --; LA64-NEXT: .LBB20_3: -+; LA64-NEXT: beqz $a6, .LBB24_1 -+; LA64-NEXT: b .LBB24_4 -+; LA64-NEXT: .LBB24_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB20_4: -+; LA64-NEXT: .LBB24_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: xor $a0, $a1, $a0 -@@ -613,19 +718,19 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: addi.w $a2, $a4, 0 --; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a2 --; LA64-NEXT: bne $a6, $a1, .LBB21_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB25_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB21_1 --; LA64-NEXT: b .LBB21_4 --; LA64-NEXT: .LBB21_3: -+; LA64-NEXT: beqz $a6, .LBB25_1 -+; LA64-NEXT: b .LBB25_4 -+; LA64-NEXT: .LBB25_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB21_4: -+; LA64-NEXT: .LBB25_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: xor $a0, $a1, $a0 -@@ -639,17 +744,17 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n - define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB22_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB26_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB22_1 --; LA64-NEXT: b .LBB22_4 --; LA64-NEXT: .LBB22_3: -+; LA64-NEXT: beqz $a4, .LBB26_1 -+; LA64-NEXT: b .LBB26_4 -+; LA64-NEXT: .LBB26_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB22_4: -+; LA64-NEXT: .LBB26_4: - ; LA64-NEXT: addi.w $a0, $a1, 0 - ; LA64-NEXT: xor $a0, $a3, $a0 - ; LA64-NEXT: sltui $a0, $a0, 1 -@@ -662,17 +767,17 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n - define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB23_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB27_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB23_1 --; LA64-NEXT: b .LBB23_4 --; LA64-NEXT: .LBB23_3: -+; LA64-NEXT: beqz $a4, .LBB27_1 -+; LA64-NEXT: b .LBB27_4 -+; LA64-NEXT: .LBB27_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB23_4: -+; LA64-NEXT: .LBB27_4: - ; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 - ; LA64-NEXT: ret --- -2.20.1 - - -From 331674f3553b747d9869276ae34667dce7099a09 Mon Sep 17 00:00:00 2001 -From: Lu Weining <90239436+SixWeining@users.noreply.github.com> -Date: Thu, 19 Oct 2023 09:21:51 +0800 -Subject: [PATCH 5/7] [LoongArch] Improve codegen for atomic cmpxchg ops - (#69339) - -PR #67391 improved atomic codegen by handling memory ordering specified -by the `cmpxchg` instruction. An acquire barrier needs to be generated -when memory ordering includes an acquire operation. This PR improves the -codegen further by only handling the failure ordering. - -(cherry picked from commit 78abc45c44cdadf76b30e1f3dc24936bb5627d68) ---- - .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- - .../LoongArch/LoongArchISelLowering.cpp | 7 ++- - .../Target/LoongArch/LoongArchInstrInfo.td | 55 ++++++++++++++++--- - .../ir-instruction/atomic-cmpxchg.ll | 8 +-- - 4 files changed, 56 insertions(+), 18 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -index b348cb56c136..18a532b55ee5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -@@ -571,11 +571,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); - } - -- AtomicOrdering Ordering = -+ AtomicOrdering FailureOrdering = - static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); - int hint; - -- switch (Ordering) { -+ switch (FailureOrdering) { - case AtomicOrdering::Acquire: - case AtomicOrdering::AcquireRelease: - case AtomicOrdering::SequentiallyConsistent: -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 33a3197013cc..99328f09921f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -4492,8 +4492,9 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( - Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( - IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, - Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { -- Value *Ordering = -- Builder.getIntN(Subtarget.getGRLen(), static_cast(Ord)); -+ AtomicOrdering FailOrd = CI->getFailureOrdering(); -+ Value *FailureOrdering = -+ Builder.getIntN(Subtarget.getGRLen(), static_cast(FailOrd)); - - // TODO: Support cmpxchg on LA32. - Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; -@@ -4504,7 +4505,7 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( - Function *MaskedCmpXchg = - Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); - Value *Result = Builder.CreateCall( -- MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); -+ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); - Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); - return Result; - } -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index fcbd314507a5..ab1890556814 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -1753,7 +1753,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; - - class PseudoCmpXchg - : Pseudo<(outs GPR:$res, GPR:$scratch), -- (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { -+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$fail_order)> { - let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; - let mayLoad = 1; - let mayStore = 1; -@@ -1767,7 +1767,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg; - def PseudoMaskedCmpXchg32 - : Pseudo<(outs GPR:$res, GPR:$scratch), - (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, -- grlenimm:$ordering)> { -+ grlenimm:$fail_order)> { - let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; - let mayLoad = 1; - let mayStore = 1; -@@ -1785,6 +1785,43 @@ class AtomicPat - : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), - (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; - -+// These atomic cmpxchg PatFrags only care about the failure ordering. -+// The PatFrags defined by multiclass `ternary_atomic_op_ord` in -+// TargetSelectionDAG.td care about the merged memory ordering that is the -+// stronger one between success and failure. But for LoongArch LL-SC we only -+// need to care about the failure ordering as explained in PR #67391. So we -+// define these PatFrags that will be used to define cmpxchg pats below. -+multiclass ternary_atomic_op_failure_ord { -+ def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::Monotonic; -+ }]>; -+ def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::Acquire; -+ }]>; -+ def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::Release; -+ }]>; -+ def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::AcquireRelease; -+ }]>; -+ def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::SequentiallyConsistent; -+ }]>; -+} -+ -+defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord; -+defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord; -+ - let Predicates = [IsLA64] in { - def : AtomicPat; -@@ -1847,24 +1884,24 @@ def : AtomicPat { -- def : Pat<(vt (!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; -- def : Pat<(vt (!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; -- def : Pat<(vt (!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; -- def : Pat<(vt (!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; -- def : Pat<(vt (!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; - } - - defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; - defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; - def : Pat<(int_loongarch_masked_cmpxchg_i64 -- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), -+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order), - (PseudoMaskedCmpXchg32 -- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; -+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>; - - def : PseudoMaskedAMMinMaxPat; -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 174bb9d0ff7d..1dd3f39852d8 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -132,7 +132,7 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { - ; LA64-NEXT: beqz $a5, .LBB4_1 - ; LA64-NEXT: b .LBB4_4 - ; LA64-NEXT: .LBB4_3: --; LA64-NEXT: dbar 20 -+; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB4_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic -@@ -166,7 +166,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin - ; LA64-NEXT: beqz $a5, .LBB5_1 - ; LA64-NEXT: b .LBB5_4 - ; LA64-NEXT: .LBB5_3: --; LA64-NEXT: dbar 20 -+; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB5_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic -@@ -185,7 +185,7 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin - ; LA64-NEXT: beqz $a4, .LBB6_1 - ; LA64-NEXT: b .LBB6_4 - ; LA64-NEXT: .LBB6_3: --; LA64-NEXT: dbar 20 -+; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB6_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic -@@ -204,7 +204,7 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin - ; LA64-NEXT: beqz $a4, .LBB7_1 - ; LA64-NEXT: b .LBB7_4 - ; LA64-NEXT: .LBB7_3: --; LA64-NEXT: dbar 20 -+; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB7_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic --- -2.20.1 - - -From d8479f9765b12a84d1756aedebf631fdbe4f0451 Mon Sep 17 00:00:00 2001 -From: Lu Weining -Date: Mon, 4 Mar 2024 08:38:52 +0800 -Subject: [PATCH 6/7] [LoongArch] Override - LoongArchTargetLowering::getExtendForAtomicCmpSwapArg (#83656) - -This patch aims to solve Firefox issue: -https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 - -Similar to 616289ed2922. Currently LoongArch uses an ll.[wd]/sc.[wd] -loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is -full-width (i.e. the `bne` instruction), we must sign extend the input -comparsion argument. - -Note that LoongArch ISA manual V1.1 has introduced compare-and-swap -instructions. We would change the implementation (return `ANY_EXTEND`) -when we support them. - -(cherry picked from commit 5f058aa211995d2f0df2a0e063532832569cb7a8) -(cherry picked from commit ea6c457b8dd2d0e6a7f05b4a5bdd2686085e1ec0) ---- - .../LoongArch/LoongArchISelLowering.cpp | 5 + - .../Target/LoongArch/LoongArchISelLowering.h | 2 + - .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 120 +++++++------ - .../ir-instruction/atomic-cmpxchg.ll | 25 +-- - .../LoongArch/ir-instruction/atomicrmw-fp.ll | 160 +++++++++--------- - 5 files changed, 159 insertions(+), 153 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 99328f09921f..4fc2b4709840 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -4893,3 +4893,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { - - return !isa(Y); - } ++define i64 @load_seq_cst_i64(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: ori $a1, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_load_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i64: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.d $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i64, ptr %ptr seq_cst, align 8 ++ ret i64 %val ++} + -+ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { -+ // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. -+ return ISD::SIGN_EXTEND; -+} -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 23b90640a690..2c9826a13237 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -203,6 +203,8 @@ public: - return ISD::SIGN_EXTEND; - } - -+ ISD::NodeType getExtendForAtomicCmpSwapArg() const override; -+ - Register getRegisterByName(const char *RegName, LLT VT, - const MachineFunction &MF) const override; - bool mayBeEmittedAsTailCall(const CallInst *CI) const override; -diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -index d8908acbc945..f0baf19bcf0e 100644 ---- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -@@ -26,15 +26,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: andi $a5, $a5, 255 - ; LA64-NEXT: sll.w $a5, $a5, $a0 - ; LA64-NEXT: and $a6, $a3, $a4 --; LA64-NEXT: or $a6, $a6, $a5 -+; LA64-NEXT: or $a5, $a6, $a5 -+; LA64-NEXT: addi.w $a6, $a3, 0 - ; LA64-NEXT: .LBB0_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB0_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a5, $a2, 0 --; LA64-NEXT: bne $a5, $a3, .LBB0_5 -+; LA64-NEXT: ll.w $a3, $a2, 0 -+; LA64-NEXT: bne $a3, $a6, .LBB0_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 --; LA64-NEXT: move $a7, $a6 -+; LA64-NEXT: move $a7, $a5 - ; LA64-NEXT: sc.w $a7, $a2, 0 - ; LA64-NEXT: beqz $a7, .LBB0_3 - ; LA64-NEXT: b .LBB0_6 -@@ -43,11 +44,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64-NEXT: addi.w $a6, $a3, 0 --; LA64-NEXT: move $a3, $a5 --; LA64-NEXT: bne $a5, $a6, .LBB0_1 -+; LA64-NEXT: bne $a3, $a6, .LBB0_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a0 - ; LA64-NEXT: ret - %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst - ret i8 %result -@@ -79,15 +78,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 - ; LA64-NEXT: sll.w $a5, $a5, $a0 - ; LA64-NEXT: and $a6, $a3, $a4 --; LA64-NEXT: or $a6, $a6, $a5 -+; LA64-NEXT: or $a5, $a6, $a5 -+; LA64-NEXT: addi.w $a6, $a3, 0 - ; LA64-NEXT: .LBB1_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB1_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a5, $a2, 0 --; LA64-NEXT: bne $a5, $a3, .LBB1_5 -+; LA64-NEXT: ll.w $a3, $a2, 0 -+; LA64-NEXT: bne $a3, $a6, .LBB1_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 --; LA64-NEXT: move $a7, $a6 -+; LA64-NEXT: move $a7, $a5 - ; LA64-NEXT: sc.w $a7, $a2, 0 - ; LA64-NEXT: beqz $a7, .LBB1_3 - ; LA64-NEXT: b .LBB1_6 -@@ -96,11 +96,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64-NEXT: addi.w $a6, $a3, 0 --; LA64-NEXT: move $a3, $a5 --; LA64-NEXT: bne $a5, $a6, .LBB1_1 -+; LA64-NEXT: bne $a3, $a6, .LBB1_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a0 - ; LA64-NEXT: ret - %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst - ret i16 %result -@@ -109,37 +107,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-LABEL: atomicrmw_uinc_wrap_i32: - ; LA64: # %bb.0: --; LA64-NEXT: ld.w $a3, $a0, 0 --; LA64-NEXT: addi.w $a2, $a1, 0 -+; LA64-NEXT: ld.w $a2, $a0, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .p2align 4, , 16 - ; LA64-NEXT: .LBB2_1: # %atomicrmw.start - ; LA64-NEXT: # =>This Loop Header: Depth=1 - ; LA64-NEXT: # Child Loop BB2_3 Depth 2 --; LA64-NEXT: addi.w $a4, $a3, 0 --; LA64-NEXT: sltu $a1, $a4, $a2 --; LA64-NEXT: xori $a1, $a1, 1 --; LA64-NEXT: addi.d $a5, $a3, 1 --; LA64-NEXT: masknez $a5, $a5, $a1 -+; LA64-NEXT: addi.w $a3, $a2, 0 -+; LA64-NEXT: sltu $a4, $a3, $a1 -+; LA64-NEXT: xori $a4, $a4, 1 -+; LA64-NEXT: addi.d $a2, $a2, 1 -+; LA64-NEXT: masknez $a4, $a2, $a4 - ; LA64-NEXT: .LBB2_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB2_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a1, $a0, 0 --; LA64-NEXT: bne $a1, $a3, .LBB2_5 -+; LA64-NEXT: ll.w $a2, $a0, 0 -+; LA64-NEXT: bne $a2, $a3, .LBB2_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 --; LA64-NEXT: move $a6, $a5 --; LA64-NEXT: sc.w $a6, $a0, 0 --; LA64-NEXT: beqz $a6, .LBB2_3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: sc.w $a5, $a0, 0 -+; LA64-NEXT: beqz $a5, .LBB2_3 - ; LA64-NEXT: b .LBB2_6 - ; LA64-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64-NEXT: move $a3, $a1 --; LA64-NEXT: bne $a1, $a4, .LBB2_1 -+; LA64-NEXT: bne $a2, $a3, .LBB2_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: move $a0, $a1 -+; LA64-NEXT: move $a0, $a2 - ; LA64-NEXT: ret - %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst - ret i32 %result -@@ -212,15 +209,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: andi $a6, $a6, 255 - ; LA64-NEXT: sll.w $a6, $a6, $a0 - ; LA64-NEXT: and $a7, $a3, $a4 --; LA64-NEXT: or $a7, $a7, $a6 -+; LA64-NEXT: or $a6, $a7, $a6 -+; LA64-NEXT: addi.w $a7, $a3, 0 - ; LA64-NEXT: .LBB4_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB4_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a6, $a2, 0 --; LA64-NEXT: bne $a6, $a3, .LBB4_5 -+; LA64-NEXT: ll.w $a3, $a2, 0 -+; LA64-NEXT: bne $a3, $a7, .LBB4_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 --; LA64-NEXT: move $t0, $a7 -+; LA64-NEXT: move $t0, $a6 - ; LA64-NEXT: sc.w $t0, $a2, 0 - ; LA64-NEXT: beqz $t0, .LBB4_3 - ; LA64-NEXT: b .LBB4_6 -@@ -229,11 +227,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB4_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 --; LA64-NEXT: addi.w $a7, $a3, 0 --; LA64-NEXT: move $a3, $a6 --; LA64-NEXT: bne $a6, $a7, .LBB4_1 -+; LA64-NEXT: bne $a3, $a7, .LBB4_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: srl.w $a0, $a6, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a0 - ; LA64-NEXT: ret - %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst - ret i8 %result -@@ -270,15 +266,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0 - ; LA64-NEXT: sll.w $a6, $a6, $a0 - ; LA64-NEXT: and $a7, $a3, $a4 --; LA64-NEXT: or $a7, $a7, $a6 -+; LA64-NEXT: or $a6, $a7, $a6 -+; LA64-NEXT: addi.w $a7, $a3, 0 - ; LA64-NEXT: .LBB5_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB5_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a6, $a2, 0 --; LA64-NEXT: bne $a6, $a3, .LBB5_5 -+; LA64-NEXT: ll.w $a3, $a2, 0 -+; LA64-NEXT: bne $a3, $a7, .LBB5_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 --; LA64-NEXT: move $t0, $a7 -+; LA64-NEXT: move $t0, $a6 - ; LA64-NEXT: sc.w $t0, $a2, 0 - ; LA64-NEXT: beqz $t0, .LBB5_3 - ; LA64-NEXT: b .LBB5_6 -@@ -287,11 +284,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB5_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 --; LA64-NEXT: addi.w $a7, $a3, 0 --; LA64-NEXT: move $a3, $a6 --; LA64-NEXT: bne $a6, $a7, .LBB5_1 -+; LA64-NEXT: bne $a3, $a7, .LBB5_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: srl.w $a0, $a6, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a0 - ; LA64-NEXT: ret - %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst - ret i16 %result -@@ -300,22 +295,22 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-LABEL: atomicrmw_udec_wrap_i32: - ; LA64: # %bb.0: --; LA64-NEXT: ld.w $a4, $a0, 0 -+; LA64-NEXT: ld.w $a2, $a0, 0 - ; LA64-NEXT: addi.w $a3, $a1, 0 - ; LA64-NEXT: .p2align 4, , 16 - ; LA64-NEXT: .LBB6_1: # %atomicrmw.start - ; LA64-NEXT: # =>This Loop Header: Depth=1 - ; LA64-NEXT: # Child Loop BB6_3 Depth 2 --; LA64-NEXT: addi.w $a5, $a4, 0 --; LA64-NEXT: sltu $a2, $a3, $a5 --; LA64-NEXT: addi.d $a6, $a4, -1 --; LA64-NEXT: masknez $a6, $a6, $a2 --; LA64-NEXT: maskeqz $a2, $a1, $a2 --; LA64-NEXT: or $a2, $a2, $a6 --; LA64-NEXT: sltui $a6, $a5, 1 --; LA64-NEXT: masknez $a2, $a2, $a6 --; LA64-NEXT: maskeqz $a6, $a1, $a6 --; LA64-NEXT: or $a6, $a6, $a2 -+; LA64-NEXT: addi.w $a4, $a2, 0 -+; LA64-NEXT: sltu $a5, $a3, $a4 -+; LA64-NEXT: addi.d $a2, $a2, -1 -+; LA64-NEXT: masknez $a2, $a2, $a5 -+; LA64-NEXT: maskeqz $a5, $a1, $a5 -+; LA64-NEXT: or $a2, $a5, $a2 -+; LA64-NEXT: sltui $a5, $a4, 1 -+; LA64-NEXT: masknez $a2, $a2, $a5 -+; LA64-NEXT: maskeqz $a5, $a1, $a5 -+; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: .LBB6_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB6_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -@@ -323,17 +318,16 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: bne $a2, $a4, .LBB6_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 --; LA64-NEXT: move $a7, $a6 --; LA64-NEXT: sc.w $a7, $a0, 0 --; LA64-NEXT: beqz $a7, .LBB6_3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sc.w $a6, $a0, 0 -+; LA64-NEXT: beqz $a6, .LBB6_3 - ; LA64-NEXT: b .LBB6_6 - ; LA64-NEXT: .LBB6_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB6_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 --; LA64-NEXT: move $a4, $a2 --; LA64-NEXT: bne $a2, $a5, .LBB6_1 -+; LA64-NEXT: bne $a2, $a4, .LBB6_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end - ; LA64-NEXT: move $a0, $a2 - ; LA64-NEXT: ret -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 1dd3f39852d8..ebb09640e6c9 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -71,6 +71,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind - define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB2_3 -@@ -176,6 +177,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin - define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_monotonic: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB6_3 -@@ -285,9 +287,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $a1, 0 - ; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB10_3 -+; LA64-NEXT: ll.w $a1, $a0, 0 -+; LA64-NEXT: bne $a1, $a3, .LBB10_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 -@@ -296,7 +299,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou - ; LA64-NEXT: .LBB10_3: - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB10_4: --; LA64-NEXT: move $a0, $a3 -+; LA64-NEXT: move $a0, $a1 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire - %res = extractvalue { i32, i1 } %tmp, 0 -@@ -404,6 +407,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB14_3 -@@ -415,8 +419,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw - ; LA64-NEXT: .LBB14_3: - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB14_4: --; LA64-NEXT: addi.w $a0, $a1, 0 --; LA64-NEXT: xor $a0, $a3, $a0 -+; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire -@@ -516,6 +519,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw - define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB18_3 -@@ -625,9 +629,10 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) - define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $a1, 0 - ; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB22_3 -+; LA64-NEXT: ll.w $a1, $a0, 0 -+; LA64-NEXT: bne $a1, $a3, .LBB22_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 -@@ -636,7 +641,7 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) - ; LA64-NEXT: .LBB22_3: - ; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB22_4: --; LA64-NEXT: move $a0, $a3 -+; LA64-NEXT: move $a0, $a1 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic - %res = extractvalue { i32, i1 } %tmp, 0 -@@ -744,6 +749,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n - define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB26_3 -@@ -755,8 +761,7 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n - ; LA64-NEXT: .LBB26_3: - ; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB26_4: --; LA64-NEXT: addi.w $a0, $a1, 0 --; LA64-NEXT: xor $a0, $a3, $a0 -+; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -index 589360823b14..4d8160d70803 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -@@ -16,6 +16,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB0_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -33,8 +34,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB0_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB0_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -51,6 +51,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB0_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -68,8 +69,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB0_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB0_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 -@@ -90,6 +90,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB1_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -107,8 +108,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB1_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB1_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -125,6 +125,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB1_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -142,8 +143,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB1_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB1_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 -@@ -165,6 +165,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB2_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -182,8 +183,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB2_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB2_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -201,6 +201,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB2_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -218,8 +219,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB2_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB2_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 -@@ -241,6 +241,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB3_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -258,8 +259,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB3_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB3_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -277,6 +277,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB3_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -294,8 +295,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB3_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB3_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 -@@ -694,6 +694,7 @@ define float @float_fadd_release(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB8_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -711,8 +712,7 @@ define float @float_fadd_release(ptr %p) nounwind { - ; LA64F-NEXT: .LBB8_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB8_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB8_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -729,6 +729,7 @@ define float @float_fadd_release(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB8_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -746,8 +747,7 @@ define float @float_fadd_release(ptr %p) nounwind { - ; LA64D-NEXT: .LBB8_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB8_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB8_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 -@@ -768,6 +768,7 @@ define float @float_fsub_release(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB9_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -785,8 +786,7 @@ define float @float_fsub_release(ptr %p) nounwind { - ; LA64F-NEXT: .LBB9_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB9_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB9_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -803,6 +803,7 @@ define float @float_fsub_release(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB9_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -820,8 +821,7 @@ define float @float_fsub_release(ptr %p) nounwind { - ; LA64D-NEXT: .LBB9_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB9_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB9_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 -@@ -843,6 +843,7 @@ define float @float_fmin_release(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB10_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -860,8 +861,7 @@ define float @float_fmin_release(ptr %p) nounwind { - ; LA64F-NEXT: .LBB10_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB10_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB10_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -879,6 +879,7 @@ define float @float_fmin_release(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB10_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -896,8 +897,7 @@ define float @float_fmin_release(ptr %p) nounwind { - ; LA64D-NEXT: .LBB10_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB10_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB10_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 -@@ -919,6 +919,7 @@ define float @float_fmax_release(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB11_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -936,8 +937,7 @@ define float @float_fmax_release(ptr %p) nounwind { - ; LA64F-NEXT: .LBB11_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB11_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB11_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -955,6 +955,7 @@ define float @float_fmax_release(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB11_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -972,8 +973,7 @@ define float @float_fmax_release(ptr %p) nounwind { - ; LA64D-NEXT: .LBB11_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB11_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB11_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 -@@ -1372,6 +1372,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB16_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1389,8 +1390,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB16_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB16_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -1407,6 +1407,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB16_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1424,8 +1425,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB16_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB16_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 -@@ -1446,6 +1446,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB17_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1463,8 +1464,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB17_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB17_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -1481,6 +1481,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB17_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1498,8 +1499,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB17_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB17_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 -@@ -1521,6 +1521,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB18_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1538,8 +1539,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB18_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB18_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -1557,6 +1557,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB18_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1574,8 +1575,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB18_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB18_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 -@@ -1597,6 +1597,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB19_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1614,8 +1615,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB19_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB19_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -1633,6 +1633,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB19_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1650,8 +1651,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB19_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB19_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 -@@ -2074,6 +2074,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB24_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2091,8 +2092,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB24_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB24_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2109,6 +2109,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB24_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2126,8 +2127,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB24_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB24_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 -@@ -2148,6 +2148,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB25_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2165,8 +2166,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB25_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB25_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2183,6 +2183,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB25_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2200,8 +2201,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB25_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB25_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 -@@ -2223,6 +2223,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB26_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2240,8 +2241,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB26_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB26_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2259,6 +2259,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB26_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2276,8 +2277,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB26_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB26_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 -@@ -2299,6 +2299,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB27_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2316,8 +2317,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB27_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB27_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2335,6 +2335,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB27_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2352,8 +2353,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB27_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB27_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 -@@ -2752,6 +2752,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB32_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2769,8 +2770,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: .LBB32_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB32_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB32_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2787,6 +2787,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB32_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2804,8 +2805,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: .LBB32_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB32_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB32_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 -@@ -2826,6 +2826,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB33_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2843,8 +2844,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: .LBB33_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB33_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB33_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2861,6 +2861,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB33_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2878,8 +2879,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: .LBB33_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB33_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB33_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 -@@ -2901,6 +2901,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB34_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2918,8 +2919,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: .LBB34_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB34_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB34_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2937,6 +2937,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB34_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2954,8 +2955,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: .LBB34_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB34_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB34_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 -@@ -2977,6 +2977,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB35_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2994,8 +2995,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: .LBB35_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB35_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB35_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -3013,6 +3013,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB35_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -3030,8 +3031,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: .LBB35_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB35_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB35_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 --- -2.20.1 - - -From 4713504e1495a3831ba1a47078de83db3a8a533c Mon Sep 17 00:00:00 2001 -From: Lu Weining -Date: Thu, 19 Oct 2023 10:16:13 +0800 -Subject: [PATCH 7/7] [Memory] Call __clear_cache in InvalidateInstructionCache - on LoongArch (#67285) - -As the comments of `InvalidateInstructionCache`: Before the JIT can run -a block of code that has been emitted it must invalidate the instruction -cache on some platforms. I think it applies to LoongArch as LoongArch -has a weak memory-model. But I'm not able to write a test to demonstrate -this issue. Perhaps self-modifing code should be wrote? - -(cherry picked from commit fb366581e7d67df7d9a98605fd65a7e7908451e7) ---- - llvm/lib/Support/Unix/Memory.inc | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc -index 4c8f6b2ea7d3..69bd1164343d 100644 ---- a/llvm/lib/Support/Unix/Memory.inc -+++ b/llvm/lib/Support/Unix/Memory.inc -@@ -237,7 +237,8 @@ void Memory::InvalidateInstructionCache(const void *Addr, size_t Len) { - for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) - asm volatile("icbi 0, %0" : : "r"(Line)); - asm volatile("isync"); --#elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ -+#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ -+ defined(__mips__)) && \ - defined(__GNUC__) - // FIXME: Can we safely always call this for __GNUC__ everywhere? - const char *Start = static_cast(Addr); + define void @store_release_i8(ptr %ptr, i8 signext %v) { + ; LA32-LABEL: store_release_i8: + ; LA32: # %bb.0: -- 2.20.1 diff --git a/0003-LoongArch-Add-LASX-intrinsic-support.patch b/0003-LoongArch-Add-LASX-intrinsic-support.patch new file mode 100644 index 0000000..3244564 --- /dev/null +++ b/0003-LoongArch-Add-LASX-intrinsic-support.patch @@ -0,0 +1,2240 @@ +From 091fc830c87c713f864c4030d3e750d9b8b144c9 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Sat, 19 Aug 2023 16:53:50 +0800 +Subject: [PATCH 03/42] [LoongArch] Add LASX intrinsic support + +This patch is similar to D155829. + +Depends on D155829 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D155830 + +(cherry picked from commit 691f0d00b84f6ecaf8e341ef38256e939cca6b1e) + +--- + llvm/include/llvm/IR/IntrinsicsLoongArch.td | 523 +++++++++++++ + .../LoongArch/LoongArchISelLowering.cpp | 402 +++++++++- + .../Target/LoongArch/LoongArchInstrInfo.cpp | 12 + + .../LoongArch/LoongArchLASXInstrInfo.td | 702 ++++++++++++++++++ + 4 files changed, 1633 insertions(+), 6 deletions(-) + +diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +index d39d8261ebe3..685deaec7709 100644 +--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td ++++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +@@ -647,3 +647,526 @@ def int_loongarch_lsx_vstelm_d + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; + + } // TargetPrefix = "loongarch" ++ ++//===----------------------------------------------------------------------===// ++// LASX ++ ++let TargetPrefix = "loongarch" in { ++foreach inst = ["xvadd_b", "xvsub_b", ++ "xvsadd_b", "xvsadd_bu", "xvssub_b", "xvssub_bu", ++ "xvavg_b", "xvavg_bu", "xvavgr_b", "xvavgr_bu", ++ "xvabsd_b", "xvabsd_bu", "xvadda_b", ++ "xvmax_b", "xvmax_bu", "xvmin_b", "xvmin_bu", ++ "xvmul_b", "xvmuh_b", "xvmuh_bu", ++ "xvdiv_b", "xvdiv_bu", "xvmod_b", "xvmod_bu", "xvsigncov_b", ++ "xvand_v", "xvor_v", "xvxor_v", "xvnor_v", "xvandn_v", "xvorn_v", ++ "xvsll_b", "xvsrl_b", "xvsra_b", "xvrotr_b", "xvsrlr_b", "xvsrar_b", ++ "xvbitclr_b", "xvbitset_b", "xvbitrev_b", ++ "xvseq_b", "xvsle_b", "xvsle_bu", "xvslt_b", "xvslt_bu", ++ "xvpackev_b", "xvpackod_b", "xvpickev_b", "xvpickod_b", ++ "xvilvl_b", "xvilvh_b"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], ++ [llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvadd_h", "xvsub_h", ++ "xvsadd_h", "xvsadd_hu", "xvssub_h", "xvssub_hu", ++ "xvavg_h", "xvavg_hu", "xvavgr_h", "xvavgr_hu", ++ "xvabsd_h", "xvabsd_hu", "xvadda_h", ++ "xvmax_h", "xvmax_hu", "xvmin_h", "xvmin_hu", ++ "xvmul_h", "xvmuh_h", "xvmuh_hu", ++ "xvdiv_h", "xvdiv_hu", "xvmod_h", "xvmod_hu", "xvsigncov_h", ++ "xvsll_h", "xvsrl_h", "xvsra_h", "xvrotr_h", "xvsrlr_h", "xvsrar_h", ++ "xvbitclr_h", "xvbitset_h", "xvbitrev_h", ++ "xvseq_h", "xvsle_h", "xvsle_hu", "xvslt_h", "xvslt_hu", ++ "xvpackev_h", "xvpackod_h", "xvpickev_h", "xvpickod_h", ++ "xvilvl_h", "xvilvh_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvadd_w", "xvsub_w", ++ "xvsadd_w", "xvsadd_wu", "xvssub_w", "xvssub_wu", ++ "xvavg_w", "xvavg_wu", "xvavgr_w", "xvavgr_wu", ++ "xvabsd_w", "xvabsd_wu", "xvadda_w", ++ "xvmax_w", "xvmax_wu", "xvmin_w", "xvmin_wu", ++ "xvmul_w", "xvmuh_w", "xvmuh_wu", ++ "xvdiv_w", "xvdiv_wu", "xvmod_w", "xvmod_wu", "xvsigncov_w", ++ "xvsll_w", "xvsrl_w", "xvsra_w", "xvrotr_w", "xvsrlr_w", "xvsrar_w", ++ "xvbitclr_w", "xvbitset_w", "xvbitrev_w", ++ "xvseq_w", "xvsle_w", "xvsle_wu", "xvslt_w", "xvslt_wu", ++ "xvpackev_w", "xvpackod_w", "xvpickev_w", "xvpickod_w", ++ "xvilvl_w", "xvilvh_w", "xvperm_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvadd_d", "xvadd_q", "xvsub_d", "xvsub_q", ++ "xvsadd_d", "xvsadd_du", "xvssub_d", "xvssub_du", ++ "xvhaddw_q_d", "xvhaddw_qu_du", "xvhsubw_q_d", "xvhsubw_qu_du", ++ "xvaddwev_q_d", "xvaddwod_q_d", "xvsubwev_q_d", "xvsubwod_q_d", ++ "xvaddwev_q_du", "xvaddwod_q_du", "xvsubwev_q_du", "xvsubwod_q_du", ++ "xvaddwev_q_du_d", "xvaddwod_q_du_d", ++ "xvavg_d", "xvavg_du", "xvavgr_d", "xvavgr_du", ++ "xvabsd_d", "xvabsd_du", "xvadda_d", ++ "xvmax_d", "xvmax_du", "xvmin_d", "xvmin_du", ++ "xvmul_d", "xvmuh_d", "xvmuh_du", ++ "xvmulwev_q_d", "xvmulwod_q_d", "xvmulwev_q_du", "xvmulwod_q_du", ++ "xvmulwev_q_du_d", "xvmulwod_q_du_d", ++ "xvdiv_d", "xvdiv_du", "xvmod_d", "xvmod_du", "xvsigncov_d", ++ "xvsll_d", "xvsrl_d", "xvsra_d", "xvrotr_d", "xvsrlr_d", "xvsrar_d", ++ "xvbitclr_d", "xvbitset_d", "xvbitrev_d", ++ "xvseq_d", "xvsle_d", "xvsle_du", "xvslt_d", "xvslt_du", ++ "xvpackev_d", "xvpackod_d", "xvpickev_d", "xvpickod_d", ++ "xvilvl_d", "xvilvh_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvaddi_bu", "xvsubi_bu", ++ "xvmaxi_b", "xvmaxi_bu", "xvmini_b", "xvmini_bu", ++ "xvsat_b", "xvsat_bu", ++ "xvandi_b", "xvori_b", "xvxori_b", "xvnori_b", ++ "xvslli_b", "xvsrli_b", "xvsrai_b", "xvrotri_b", ++ "xvsrlri_b", "xvsrari_b", ++ "xvbitclri_b", "xvbitseti_b", "xvbitrevi_b", ++ "xvseqi_b", "xvslei_b", "xvslei_bu", "xvslti_b", "xvslti_bu", ++ "xvrepl128vei_b", "xvbsll_v", "xvbsrl_v", "xvshuf4i_b"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], ++ [llvm_v32i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvaddi_hu", "xvsubi_hu", ++ "xvmaxi_h", "xvmaxi_hu", "xvmini_h", "xvmini_hu", ++ "xvsat_h", "xvsat_hu", ++ "xvslli_h", "xvsrli_h", "xvsrai_h", "xvrotri_h", ++ "xvsrlri_h", "xvsrari_h", ++ "xvbitclri_h", "xvbitseti_h", "xvbitrevi_h", ++ "xvseqi_h", "xvslei_h", "xvslei_hu", "xvslti_h", "xvslti_hu", ++ "xvrepl128vei_h", "xvshuf4i_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvaddi_wu", "xvsubi_wu", ++ "xvmaxi_w", "xvmaxi_wu", "xvmini_w", "xvmini_wu", ++ "xvsat_w", "xvsat_wu", ++ "xvslli_w", "xvsrli_w", "xvsrai_w", "xvrotri_w", ++ "xvsrlri_w", "xvsrari_w", ++ "xvbitclri_w", "xvbitseti_w", "xvbitrevi_w", ++ "xvseqi_w", "xvslei_w", "xvslei_wu", "xvslti_w", "xvslti_wu", ++ "xvrepl128vei_w", "xvshuf4i_w", "xvpickve_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvaddi_du", "xvsubi_du", ++ "xvmaxi_d", "xvmaxi_du", "xvmini_d", "xvmini_du", ++ "xvsat_d", "xvsat_du", ++ "xvslli_d", "xvsrli_d", "xvsrai_d", "xvrotri_d", ++ "xvsrlri_d", "xvsrari_d", ++ "xvbitclri_d", "xvbitseti_d", "xvbitrevi_d", ++ "xvseqi_d", "xvslei_d", "xvslei_du", "xvslti_d", "xvslti_du", ++ "xvrepl128vei_d", "xvpermi_d", "xvpickve_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["xvhaddw_h_b", "xvhaddw_hu_bu", "xvhsubw_h_b", "xvhsubw_hu_bu", ++ "xvaddwev_h_b", "xvaddwod_h_b", "xvsubwev_h_b", "xvsubwod_h_b", ++ "xvaddwev_h_bu", "xvaddwod_h_bu", "xvsubwev_h_bu", "xvsubwod_h_bu", ++ "xvaddwev_h_bu_b", "xvaddwod_h_bu_b", ++ "xvmulwev_h_b", "xvmulwod_h_b", "xvmulwev_h_bu", "xvmulwod_h_bu", ++ "xvmulwev_h_bu_b", "xvmulwod_h_bu_b"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvhaddw_w_h", "xvhaddw_wu_hu", "xvhsubw_w_h", "xvhsubw_wu_hu", ++ "xvaddwev_w_h", "xvaddwod_w_h", "xvsubwev_w_h", "xvsubwod_w_h", ++ "xvaddwev_w_hu", "xvaddwod_w_hu", "xvsubwev_w_hu", "xvsubwod_w_hu", ++ "xvaddwev_w_hu_h", "xvaddwod_w_hu_h", ++ "xvmulwev_w_h", "xvmulwod_w_h", "xvmulwev_w_hu", "xvmulwod_w_hu", ++ "xvmulwev_w_hu_h", "xvmulwod_w_hu_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvhaddw_d_w", "xvhaddw_du_wu", "xvhsubw_d_w", "xvhsubw_du_wu", ++ "xvaddwev_d_w", "xvaddwod_d_w", "xvsubwev_d_w", "xvsubwod_d_w", ++ "xvaddwev_d_wu", "xvaddwod_d_wu", "xvsubwev_d_wu", "xvsubwod_d_wu", ++ "xvaddwev_d_wu_w", "xvaddwod_d_wu_w", ++ "xvmulwev_d_w", "xvmulwod_d_w", "xvmulwev_d_wu", "xvmulwod_d_wu", ++ "xvmulwev_d_wu_w", "xvmulwod_d_wu_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsrln_b_h", "xvsran_b_h", "xvsrlrn_b_h", "xvsrarn_b_h", ++ "xvssrln_b_h", "xvssran_b_h", "xvssrln_bu_h", "xvssran_bu_h", ++ "xvssrlrn_b_h", "xvssrarn_b_h", "xvssrlrn_bu_h", "xvssrarn_bu_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsrln_h_w", "xvsran_h_w", "xvsrlrn_h_w", "xvsrarn_h_w", ++ "xvssrln_h_w", "xvssran_h_w", "xvssrln_hu_w", "xvssran_hu_w", ++ "xvssrlrn_h_w", "xvssrarn_h_w", "xvssrlrn_hu_w", "xvssrarn_hu_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsrln_w_d", "xvsran_w_d", "xvsrlrn_w_d", "xvsrarn_w_d", ++ "xvssrln_w_d", "xvssran_w_d", "xvssrln_wu_d", "xvssran_wu_d", ++ "xvssrlrn_w_d", "xvssrarn_w_d", "xvssrlrn_wu_d", "xvssrarn_wu_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvmadd_b", "xvmsub_b", "xvfrstp_b", "xvbitsel_v", "xvshuf_b"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v32i8_ty], ++ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmadd_h", "xvmsub_h", "xvfrstp_h", "xvshuf_h"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmadd_w", "xvmsub_w", "xvshuf_w"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmadd_d", "xvmsub_d", "xvshuf_d"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsrlni_b_h", "xvsrani_b_h", "xvsrlrni_b_h", "xvsrarni_b_h", ++ "xvssrlni_b_h", "xvssrani_b_h", "xvssrlni_bu_h", "xvssrani_bu_h", ++ "xvssrlrni_b_h", "xvssrarni_b_h", "xvssrlrni_bu_h", "xvssrarni_bu_h", ++ "xvfrstpi_b", "xvbitseli_b", "xvextrins_b", "xvpermi_q"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v32i8_ty], ++ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsrlni_h_w", "xvsrani_h_w", "xvsrlrni_h_w", "xvsrarni_h_w", ++ "xvssrlni_h_w", "xvssrani_h_w", "xvssrlni_hu_w", "xvssrani_hu_w", ++ "xvssrlrni_h_w", "xvssrarni_h_w", "xvssrlrni_hu_w", "xvssrarni_hu_w", ++ "xvfrstpi_h", "xvextrins_h"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsrlni_w_d", "xvsrani_w_d", "xvsrlrni_w_d", "xvsrarni_w_d", ++ "xvssrlni_w_d", "xvssrani_w_d", "xvssrlni_wu_d", "xvssrani_wu_d", ++ "xvssrlrni_w_d", "xvssrarni_w_d", "xvssrlrni_wu_d", "xvssrarni_wu_d", ++ "xvpermi_w", "xvextrins_w", "xvinsve0_w"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsrlni_d_q", "xvsrani_d_q", "xvsrlrni_d_q", "xvsrarni_d_q", ++ "xvssrlni_d_q", "xvssrani_d_q", "xvssrlni_du_q", "xvssrani_du_q", ++ "xvssrlrni_d_q", "xvssrarni_d_q", "xvssrlrni_du_q", "xvssrarni_du_q", ++ "xvshuf4i_d", "xvextrins_d", "xvinsve0_d"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["xvmaddwev_h_b", "xvmaddwod_h_b", "xvmaddwev_h_bu", ++ "xvmaddwod_h_bu", "xvmaddwev_h_bu_b", "xvmaddwod_h_bu_b"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmaddwev_w_h", "xvmaddwod_w_h", "xvmaddwev_w_hu", ++ "xvmaddwod_w_hu", "xvmaddwev_w_hu_h", "xvmaddwod_w_hu_h"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmaddwev_d_w", "xvmaddwod_d_w", "xvmaddwev_d_wu", ++ "xvmaddwod_d_wu", "xvmaddwev_d_wu_w", "xvmaddwod_d_wu_w"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmaddwev_q_d", "xvmaddwod_q_d", "xvmaddwev_q_du", ++ "xvmaddwod_q_du", "xvmaddwev_q_du_d", "xvmaddwod_q_du_d"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsllwil_h_b", "xvsllwil_hu_bu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v32i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsllwil_w_h", "xvsllwil_wu_hu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v16i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsllwil_d_w", "xvsllwil_du_wu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v8i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["xvneg_b", "xvmskltz_b", "xvmskgez_b", "xvmsknz_b", ++ "xvclo_b", "xvclz_b", "xvpcnt_b", ++ "xvreplve0_b", "xvreplve0_q"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvneg_h", "xvmskltz_h", "xvclo_h", "xvclz_h", "xvpcnt_h", ++ "xvreplve0_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvneg_w", "xvmskltz_w", "xvclo_w", "xvclz_w", "xvpcnt_w", ++ "xvreplve0_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvneg_d", "xvexth_q_d", "xvexth_qu_du", "xvmskltz_d", ++ "xvextl_q_d", "xvextl_qu_du", "xvclo_d", "xvclz_d", "xvpcnt_d", ++ "xvreplve0_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvexth_h_b", "xvexth_hu_bu", "vext2xv_h_b", "vext2xv_hu_bu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvexth_w_h", "xvexth_wu_hu", "vext2xv_w_h", "vext2xv_wu_hu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvexth_d_w", "xvexth_du_wu", "vext2xv_d_w", "vext2xv_du_wu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vext2xv_w_b", "vext2xv_wu_bu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vext2xv_d_h", "vext2xv_du_hu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vext2xv_d_b", "vext2xv_du_bu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvldi : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvrepli_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvrepli_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvrepli_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvrepli_d : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lasx_xvreplgr2vr_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_d : VecInt<[llvm_v4i64_ty], [llvm_i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvinsgr2vr_w ++ : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvinsgr2vr_d ++ : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lasx_xvreplve_b ++ : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_h ++ : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_w ++ : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_d ++ : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++foreach inst = ["xvpickve2gr_w", "xvpickve2gr_wu" ] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_i32_ty], ++ [llvm_v8i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvpickve2gr_d", "xvpickve2gr_du" ] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_i64_ty], ++ [llvm_v4i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lasx_xbz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbnz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++// LASX Float ++ ++foreach inst = ["xvfadd_s", "xvfsub_s", "xvfmul_s", "xvfdiv_s", ++ "xvfmax_s", "xvfmin_s", "xvfmaxa_s", "xvfmina_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], ++ [llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfadd_d", "xvfsub_d", "xvfmul_d", "xvfdiv_d", ++ "xvfmax_d", "xvfmin_d", "xvfmaxa_d", "xvfmina_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvfmadd_s", "xvfmsub_s", "xvfnmadd_s", "xvfnmsub_s"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v8f32_ty], ++ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfmadd_d", "xvfmsub_d", "xvfnmadd_d", "xvfnmsub_d"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4f64_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvflogb_s", "xvfsqrt_s", "xvfrecip_s", "xvfrsqrt_s", "xvfrint_s", ++ "xvfrintrne_s", "xvfrintrz_s", "xvfrintrp_s", "xvfrintrm_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvflogb_d", "xvfsqrt_d", "xvfrecip_d", "xvfrsqrt_d", "xvfrint_d", ++ "xvfrintrne_d", "xvfrintrz_d", "xvfrintrp_d", "xvfrintrm_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvfcvtl_s_h", "xvfcvth_s_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfcvtl_d_s", "xvfcvth_d_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8f32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvftintrne_w_s", "xvftintrz_w_s", "xvftintrp_w_s", "xvftintrm_w_s", ++ "xvftint_w_s", "xvftintrz_wu_s", "xvftint_wu_s", "xvfclass_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvftintrne_l_d", "xvftintrz_l_d", "xvftintrp_l_d", "xvftintrm_l_d", ++ "xvftint_l_d", "xvftintrz_lu_d", "xvftint_lu_d", "xvfclass_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvftintrnel_l_s", "xvftintrneh_l_s", "xvftintrzl_l_s", ++ "xvftintrzh_l_s", "xvftintrpl_l_s", "xvftintrph_l_s", ++ "xvftintrml_l_s", "xvftintrmh_l_s", "xvftintl_l_s", ++ "xvftinth_l_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8f32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvffint_s_w", "xvffint_s_wu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvffint_d_l", "xvffint_d_lu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvffintl_d_w", "xvffinth_d_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvffint_s_l"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvftintrne_w_d", "xvftintrz_w_d", "xvftintrp_w_d", "xvftintrm_w_d", ++ "xvftint_w_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvfcvt_h_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfcvt_s_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvfcmp_caf_s", "xvfcmp_cun_s", "xvfcmp_ceq_s", "xvfcmp_cueq_s", ++ "xvfcmp_clt_s", "xvfcmp_cult_s", "xvfcmp_cle_s", "xvfcmp_cule_s", ++ "xvfcmp_cne_s", "xvfcmp_cor_s", "xvfcmp_cune_s", ++ "xvfcmp_saf_s", "xvfcmp_sun_s", "xvfcmp_seq_s", "xvfcmp_sueq_s", ++ "xvfcmp_slt_s", "xvfcmp_sult_s", "xvfcmp_sle_s", "xvfcmp_sule_s", ++ "xvfcmp_sne_s", "xvfcmp_sor_s", "xvfcmp_sune_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfcmp_caf_d", "xvfcmp_cun_d", "xvfcmp_ceq_d", "xvfcmp_cueq_d", ++ "xvfcmp_clt_d", "xvfcmp_cult_d", "xvfcmp_cle_d", "xvfcmp_cule_d", ++ "xvfcmp_cne_d", "xvfcmp_cor_d", "xvfcmp_cune_d", ++ "xvfcmp_saf_d", "xvfcmp_sun_d", "xvfcmp_seq_d", "xvfcmp_sueq_d", ++ "xvfcmp_slt_d", "xvfcmp_sult_d", "xvfcmp_sle_d", "xvfcmp_sule_d", ++ "xvfcmp_sne_d", "xvfcmp_sor_d", "xvfcmp_sune_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickve_w_f ++ : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvpickve_d_f ++ : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++// LASX load/store ++def int_loongarch_lasx_xvld ++ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvldx ++ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvldrepl_b ++ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvldrepl_h ++ : VecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvldrepl_w ++ : VecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvldrepl_d ++ : VecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++ ++def int_loongarch_lasx_xvst ++ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvstx ++ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], ++ [IntrWriteMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvstelm_b ++ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lasx_xvstelm_h ++ : VecInt<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lasx_xvstelm_w ++ : VecInt<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lasx_xvstelm_d ++ : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++} // TargetPrefix = "loongarch" +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index c05133647929..3a40cd06a3eb 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -64,11 +64,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + + static const MVT::SimpleValueType LSXVTs[] = { + MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; ++ static const MVT::SimpleValueType LASXVTs[] = { ++ MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; + + if (Subtarget.hasExtLSX()) + for (MVT VT : LSXVTs) + addRegisterClass(VT, &LoongArch::LSX128RegClass); + ++ if (Subtarget.hasExtLASX()) ++ for (MVT VT : LASXVTs) ++ addRegisterClass(VT, &LoongArch::LASX256RegClass); ++ + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, + MVT::i1, Promote); + +@@ -207,6 +213,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, + {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); + ++ if (Subtarget.hasExtLASX()) ++ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, ++ {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, ++ Legal); ++ + // Compute derived properties from the register classes. + computeRegisterProperties(Subtarget.getRegisterInfo()); + +@@ -695,9 +706,17 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vpickve2gr_d: + case Intrinsic::loongarch_lsx_vpickve2gr_du: + case Intrinsic::loongarch_lsx_vreplvei_d: ++ case Intrinsic::loongarch_lasx_xvrepl128vei_d: + return checkIntrinsicImmArg<1>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vreplvei_w: ++ case Intrinsic::loongarch_lasx_xvrepl128vei_w: ++ case Intrinsic::loongarch_lasx_xvpickve2gr_d: ++ case Intrinsic::loongarch_lasx_xvpickve2gr_du: ++ case Intrinsic::loongarch_lasx_xvpickve_d: ++ case Intrinsic::loongarch_lasx_xvpickve_d_f: + return checkIntrinsicImmArg<2>(Op, 2, DAG); ++ case Intrinsic::loongarch_lasx_xvinsve0_d: ++ return checkIntrinsicImmArg<2>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_b: + case Intrinsic::loongarch_lsx_vsat_bu: + case Intrinsic::loongarch_lsx_vrotri_b: +@@ -706,7 +725,19 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vsrlri_b: + case Intrinsic::loongarch_lsx_vsrari_b: + case Intrinsic::loongarch_lsx_vreplvei_h: ++ case Intrinsic::loongarch_lasx_xvsat_b: ++ case Intrinsic::loongarch_lasx_xvsat_bu: ++ case Intrinsic::loongarch_lasx_xvrotri_b: ++ case Intrinsic::loongarch_lasx_xvsllwil_h_b: ++ case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: ++ case Intrinsic::loongarch_lasx_xvsrlri_b: ++ case Intrinsic::loongarch_lasx_xvsrari_b: ++ case Intrinsic::loongarch_lasx_xvrepl128vei_h: ++ case Intrinsic::loongarch_lasx_xvpickve_w: ++ case Intrinsic::loongarch_lasx_xvpickve_w_f: + return checkIntrinsicImmArg<3>(Op, 2, DAG); ++ case Intrinsic::loongarch_lasx_xvinsve0_w: ++ return checkIntrinsicImmArg<3>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_h: + case Intrinsic::loongarch_lsx_vsat_hu: + case Intrinsic::loongarch_lsx_vrotri_h: +@@ -715,6 +746,14 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vsrlri_h: + case Intrinsic::loongarch_lsx_vsrari_h: + case Intrinsic::loongarch_lsx_vreplvei_b: ++ case Intrinsic::loongarch_lasx_xvsat_h: ++ case Intrinsic::loongarch_lasx_xvsat_hu: ++ case Intrinsic::loongarch_lasx_xvrotri_h: ++ case Intrinsic::loongarch_lasx_xvsllwil_w_h: ++ case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: ++ case Intrinsic::loongarch_lasx_xvsrlri_h: ++ case Intrinsic::loongarch_lasx_xvsrari_h: ++ case Intrinsic::loongarch_lasx_xvrepl128vei_b: + return checkIntrinsicImmArg<4>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vsrlni_b_h: + case Intrinsic::loongarch_lsx_vsrani_b_h: +@@ -728,6 +767,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vssrarni_b_h: + case Intrinsic::loongarch_lsx_vssrlrni_bu_h: + case Intrinsic::loongarch_lsx_vssrarni_bu_h: ++ case Intrinsic::loongarch_lasx_xvsrlni_b_h: ++ case Intrinsic::loongarch_lasx_xvsrani_b_h: ++ case Intrinsic::loongarch_lasx_xvsrlrni_b_h: ++ case Intrinsic::loongarch_lasx_xvsrarni_b_h: ++ case Intrinsic::loongarch_lasx_xvssrlni_b_h: ++ case Intrinsic::loongarch_lasx_xvssrani_b_h: ++ case Intrinsic::loongarch_lasx_xvssrlni_bu_h: ++ case Intrinsic::loongarch_lasx_xvssrani_bu_h: ++ case Intrinsic::loongarch_lasx_xvssrlrni_b_h: ++ case Intrinsic::loongarch_lasx_xvssrarni_b_h: ++ case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: ++ case Intrinsic::loongarch_lasx_xvssrarni_bu_h: + return checkIntrinsicImmArg<4>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_w: + case Intrinsic::loongarch_lsx_vsat_wu: +@@ -746,6 +797,23 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vslti_du: + case Intrinsic::loongarch_lsx_vbsll_v: + case Intrinsic::loongarch_lsx_vbsrl_v: ++ case Intrinsic::loongarch_lasx_xvsat_w: ++ case Intrinsic::loongarch_lasx_xvsat_wu: ++ case Intrinsic::loongarch_lasx_xvrotri_w: ++ case Intrinsic::loongarch_lasx_xvsllwil_d_w: ++ case Intrinsic::loongarch_lasx_xvsllwil_du_wu: ++ case Intrinsic::loongarch_lasx_xvsrlri_w: ++ case Intrinsic::loongarch_lasx_xvsrari_w: ++ case Intrinsic::loongarch_lasx_xvslei_bu: ++ case Intrinsic::loongarch_lasx_xvslei_hu: ++ case Intrinsic::loongarch_lasx_xvslei_wu: ++ case Intrinsic::loongarch_lasx_xvslei_du: ++ case Intrinsic::loongarch_lasx_xvslti_bu: ++ case Intrinsic::loongarch_lasx_xvslti_hu: ++ case Intrinsic::loongarch_lasx_xvslti_wu: ++ case Intrinsic::loongarch_lasx_xvslti_du: ++ case Intrinsic::loongarch_lasx_xvbsll_v: ++ case Intrinsic::loongarch_lasx_xvbsrl_v: + return checkIntrinsicImmArg<5>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vseqi_b: + case Intrinsic::loongarch_lsx_vseqi_h: +@@ -759,6 +827,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vslti_h: + case Intrinsic::loongarch_lsx_vslti_w: + case Intrinsic::loongarch_lsx_vslti_d: ++ case Intrinsic::loongarch_lasx_xvseqi_b: ++ case Intrinsic::loongarch_lasx_xvseqi_h: ++ case Intrinsic::loongarch_lasx_xvseqi_w: ++ case Intrinsic::loongarch_lasx_xvseqi_d: ++ case Intrinsic::loongarch_lasx_xvslei_b: ++ case Intrinsic::loongarch_lasx_xvslei_h: ++ case Intrinsic::loongarch_lasx_xvslei_w: ++ case Intrinsic::loongarch_lasx_xvslei_d: ++ case Intrinsic::loongarch_lasx_xvslti_b: ++ case Intrinsic::loongarch_lasx_xvslti_h: ++ case Intrinsic::loongarch_lasx_xvslti_w: ++ case Intrinsic::loongarch_lasx_xvslti_d: + return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); + case Intrinsic::loongarch_lsx_vsrlni_h_w: + case Intrinsic::loongarch_lsx_vsrani_h_w: +@@ -774,12 +854,31 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vssrarni_hu_w: + case Intrinsic::loongarch_lsx_vfrstpi_b: + case Intrinsic::loongarch_lsx_vfrstpi_h: ++ case Intrinsic::loongarch_lasx_xvsrlni_h_w: ++ case Intrinsic::loongarch_lasx_xvsrani_h_w: ++ case Intrinsic::loongarch_lasx_xvsrlrni_h_w: ++ case Intrinsic::loongarch_lasx_xvsrarni_h_w: ++ case Intrinsic::loongarch_lasx_xvssrlni_h_w: ++ case Intrinsic::loongarch_lasx_xvssrani_h_w: ++ case Intrinsic::loongarch_lasx_xvssrlni_hu_w: ++ case Intrinsic::loongarch_lasx_xvssrani_hu_w: ++ case Intrinsic::loongarch_lasx_xvssrlrni_h_w: ++ case Intrinsic::loongarch_lasx_xvssrarni_h_w: ++ case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: ++ case Intrinsic::loongarch_lasx_xvssrarni_hu_w: ++ case Intrinsic::loongarch_lasx_xvfrstpi_b: ++ case Intrinsic::loongarch_lasx_xvfrstpi_h: + return checkIntrinsicImmArg<5>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_d: + case Intrinsic::loongarch_lsx_vsat_du: + case Intrinsic::loongarch_lsx_vrotri_d: + case Intrinsic::loongarch_lsx_vsrlri_d: + case Intrinsic::loongarch_lsx_vsrari_d: ++ case Intrinsic::loongarch_lasx_xvsat_d: ++ case Intrinsic::loongarch_lasx_xvsat_du: ++ case Intrinsic::loongarch_lasx_xvrotri_d: ++ case Intrinsic::loongarch_lasx_xvsrlri_d: ++ case Intrinsic::loongarch_lasx_xvsrari_d: + return checkIntrinsicImmArg<6>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vsrlni_w_d: + case Intrinsic::loongarch_lsx_vsrani_w_d: +@@ -793,6 +892,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vssrarni_w_d: + case Intrinsic::loongarch_lsx_vssrlrni_wu_d: + case Intrinsic::loongarch_lsx_vssrarni_wu_d: ++ case Intrinsic::loongarch_lasx_xvsrlni_w_d: ++ case Intrinsic::loongarch_lasx_xvsrani_w_d: ++ case Intrinsic::loongarch_lasx_xvsrlrni_w_d: ++ case Intrinsic::loongarch_lasx_xvsrarni_w_d: ++ case Intrinsic::loongarch_lasx_xvssrlni_w_d: ++ case Intrinsic::loongarch_lasx_xvssrani_w_d: ++ case Intrinsic::loongarch_lasx_xvssrlni_wu_d: ++ case Intrinsic::loongarch_lasx_xvssrani_wu_d: ++ case Intrinsic::loongarch_lasx_xvssrlrni_w_d: ++ case Intrinsic::loongarch_lasx_xvssrarni_w_d: ++ case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: ++ case Intrinsic::loongarch_lasx_xvssrarni_wu_d: + return checkIntrinsicImmArg<6>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsrlni_d_q: + case Intrinsic::loongarch_lsx_vsrani_d_q: +@@ -806,11 +917,28 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vssrarni_d_q: + case Intrinsic::loongarch_lsx_vssrlrni_du_q: + case Intrinsic::loongarch_lsx_vssrarni_du_q: ++ case Intrinsic::loongarch_lasx_xvsrlni_d_q: ++ case Intrinsic::loongarch_lasx_xvsrani_d_q: ++ case Intrinsic::loongarch_lasx_xvsrlrni_d_q: ++ case Intrinsic::loongarch_lasx_xvsrarni_d_q: ++ case Intrinsic::loongarch_lasx_xvssrlni_d_q: ++ case Intrinsic::loongarch_lasx_xvssrani_d_q: ++ case Intrinsic::loongarch_lasx_xvssrlni_du_q: ++ case Intrinsic::loongarch_lasx_xvssrani_du_q: ++ case Intrinsic::loongarch_lasx_xvssrlrni_d_q: ++ case Intrinsic::loongarch_lasx_xvssrarni_d_q: ++ case Intrinsic::loongarch_lasx_xvssrlrni_du_q: ++ case Intrinsic::loongarch_lasx_xvssrarni_du_q: + return checkIntrinsicImmArg<7>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vnori_b: + case Intrinsic::loongarch_lsx_vshuf4i_b: + case Intrinsic::loongarch_lsx_vshuf4i_h: + case Intrinsic::loongarch_lsx_vshuf4i_w: ++ case Intrinsic::loongarch_lasx_xvnori_b: ++ case Intrinsic::loongarch_lasx_xvshuf4i_b: ++ case Intrinsic::loongarch_lasx_xvshuf4i_h: ++ case Intrinsic::loongarch_lasx_xvshuf4i_w: ++ case Intrinsic::loongarch_lasx_xvpermi_d: + return checkIntrinsicImmArg<8>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vshuf4i_d: + case Intrinsic::loongarch_lsx_vpermi_w: +@@ -819,13 +947,26 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vextrins_h: + case Intrinsic::loongarch_lsx_vextrins_w: + case Intrinsic::loongarch_lsx_vextrins_d: ++ case Intrinsic::loongarch_lasx_xvshuf4i_d: ++ case Intrinsic::loongarch_lasx_xvpermi_w: ++ case Intrinsic::loongarch_lasx_xvpermi_q: ++ case Intrinsic::loongarch_lasx_xvbitseli_b: ++ case Intrinsic::loongarch_lasx_xvextrins_b: ++ case Intrinsic::loongarch_lasx_xvextrins_h: ++ case Intrinsic::loongarch_lasx_xvextrins_w: ++ case Intrinsic::loongarch_lasx_xvextrins_d: + return checkIntrinsicImmArg<8>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vrepli_b: + case Intrinsic::loongarch_lsx_vrepli_h: + case Intrinsic::loongarch_lsx_vrepli_w: + case Intrinsic::loongarch_lsx_vrepli_d: ++ case Intrinsic::loongarch_lasx_xvrepli_b: ++ case Intrinsic::loongarch_lasx_xvrepli_h: ++ case Intrinsic::loongarch_lasx_xvrepli_w: ++ case Intrinsic::loongarch_lasx_xvrepli_d: + return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); + case Intrinsic::loongarch_lsx_vldi: ++ case Intrinsic::loongarch_lasx_xvldi: + return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); + } + } +@@ -924,22 +1065,27 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + } + case Intrinsic::loongarch_lsx_vld: + case Intrinsic::loongarch_lsx_vldrepl_b: ++ case Intrinsic::loongarch_lasx_xvld: ++ case Intrinsic::loongarch_lasx_xvldrepl_b: + return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_h: ++ case Intrinsic::loongarch_lasx_xvldrepl_h: + return !isShiftedInt<11, 1>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_w: ++ case Intrinsic::loongarch_lasx_xvldrepl_w: + return !isShiftedInt<10, 2>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_d: ++ case Intrinsic::loongarch_lasx_xvldrepl_d: + return !isShiftedInt<9, 3>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( +@@ -1064,14 +1210,27 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + : Op; + } + case Intrinsic::loongarch_lsx_vst: ++ case Intrinsic::loongarch_lasx_xvst: + return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); ++ case Intrinsic::loongarch_lasx_xvstelm_b: ++ return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<5>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_b: + return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); ++ case Intrinsic::loongarch_lasx_xvstelm_h: ++ return (!isShiftedInt<8, 1>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 2", DAG) ++ : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_h: + return (!isShiftedInt<8, 1>( + cast(Op.getOperand(4))->getSExtValue()) || +@@ -1079,6 +1238,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); ++ case Intrinsic::loongarch_lasx_xvstelm_w: ++ return (!isShiftedInt<8, 2>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 4", DAG) ++ : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_w: + return (!isShiftedInt<8, 2>( + cast(Op.getOperand(4))->getSExtValue()) || +@@ -1086,6 +1252,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); ++ case Intrinsic::loongarch_lasx_xvstelm_d: ++ return (!isShiftedInt<8, 3>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 8", DAG) ++ : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_d: + return (!isShiftedInt<8, 3>( + cast(Op.getOperand(4))->getSExtValue()) || +@@ -1304,6 +1477,7 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + LoongArchISD::VPICK_SEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_h: ++ case Intrinsic::loongarch_lasx_xvpickve2gr_w: + replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_SEXT_ELT); + break; +@@ -1316,6 +1490,7 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + LoongArchISD::VPICK_ZEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_hu: ++ case Intrinsic::loongarch_lasx_xvpickve2gr_wu: + replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_ZEXT_ELT); + break; +@@ -1327,10 +1502,15 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + case Intrinsic::loongarch_lsx_bz_h: + case Intrinsic::loongarch_lsx_bz_w: + case Intrinsic::loongarch_lsx_bz_d: ++ case Intrinsic::loongarch_lasx_xbz_b: ++ case Intrinsic::loongarch_lasx_xbz_h: ++ case Intrinsic::loongarch_lasx_xbz_w: ++ case Intrinsic::loongarch_lasx_xbz_d: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VALL_ZERO); + break; + case Intrinsic::loongarch_lsx_bz_v: ++ case Intrinsic::loongarch_lasx_xbz_v: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VANY_ZERO); + break; +@@ -1338,10 +1518,15 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + case Intrinsic::loongarch_lsx_bnz_h: + case Intrinsic::loongarch_lsx_bnz_w: + case Intrinsic::loongarch_lsx_bnz_d: ++ case Intrinsic::loongarch_lasx_xbnz_b: ++ case Intrinsic::loongarch_lasx_xbnz_h: ++ case Intrinsic::loongarch_lasx_xbnz_w: ++ case Intrinsic::loongarch_lasx_xbnz_d: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VALL_NONZERO); + break; + case Intrinsic::loongarch_lsx_bnz_v: ++ case Intrinsic::loongarch_lasx_xbnz_v: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VANY_NONZERO); + break; +@@ -2114,30 +2299,50 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vadd_h: + case Intrinsic::loongarch_lsx_vadd_w: + case Intrinsic::loongarch_lsx_vadd_d: ++ case Intrinsic::loongarch_lasx_xvadd_b: ++ case Intrinsic::loongarch_lasx_xvadd_h: ++ case Intrinsic::loongarch_lasx_xvadd_w: ++ case Intrinsic::loongarch_lasx_xvadd_d: + return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vaddi_bu: + case Intrinsic::loongarch_lsx_vaddi_hu: + case Intrinsic::loongarch_lsx_vaddi_wu: + case Intrinsic::loongarch_lsx_vaddi_du: ++ case Intrinsic::loongarch_lasx_xvaddi_bu: ++ case Intrinsic::loongarch_lasx_xvaddi_hu: ++ case Intrinsic::loongarch_lasx_xvaddi_wu: ++ case Intrinsic::loongarch_lasx_xvaddi_du: + return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsub_b: + case Intrinsic::loongarch_lsx_vsub_h: + case Intrinsic::loongarch_lsx_vsub_w: + case Intrinsic::loongarch_lsx_vsub_d: ++ case Intrinsic::loongarch_lasx_xvsub_b: ++ case Intrinsic::loongarch_lasx_xvsub_h: ++ case Intrinsic::loongarch_lasx_xvsub_w: ++ case Intrinsic::loongarch_lasx_xvsub_d: + return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vsubi_bu: + case Intrinsic::loongarch_lsx_vsubi_hu: + case Intrinsic::loongarch_lsx_vsubi_wu: + case Intrinsic::loongarch_lsx_vsubi_du: ++ case Intrinsic::loongarch_lasx_xvsubi_bu: ++ case Intrinsic::loongarch_lasx_xvsubi_hu: ++ case Intrinsic::loongarch_lasx_xvsubi_wu: ++ case Intrinsic::loongarch_lasx_xvsubi_du: + return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vneg_b: + case Intrinsic::loongarch_lsx_vneg_h: + case Intrinsic::loongarch_lsx_vneg_w: + case Intrinsic::loongarch_lsx_vneg_d: ++ case Intrinsic::loongarch_lasx_xvneg_b: ++ case Intrinsic::loongarch_lasx_xvneg_h: ++ case Intrinsic::loongarch_lasx_xvneg_w: ++ case Intrinsic::loongarch_lasx_xvneg_d: + return DAG.getNode( + ISD::SUB, DL, N->getValueType(0), + DAG.getConstant( +@@ -2149,60 +2354,100 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vmax_h: + case Intrinsic::loongarch_lsx_vmax_w: + case Intrinsic::loongarch_lsx_vmax_d: ++ case Intrinsic::loongarch_lasx_xvmax_b: ++ case Intrinsic::loongarch_lasx_xvmax_h: ++ case Intrinsic::loongarch_lasx_xvmax_w: ++ case Intrinsic::loongarch_lasx_xvmax_d: + return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmax_bu: + case Intrinsic::loongarch_lsx_vmax_hu: + case Intrinsic::loongarch_lsx_vmax_wu: + case Intrinsic::loongarch_lsx_vmax_du: ++ case Intrinsic::loongarch_lasx_xvmax_bu: ++ case Intrinsic::loongarch_lasx_xvmax_hu: ++ case Intrinsic::loongarch_lasx_xvmax_wu: ++ case Intrinsic::loongarch_lasx_xvmax_du: + return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmaxi_b: + case Intrinsic::loongarch_lsx_vmaxi_h: + case Intrinsic::loongarch_lsx_vmaxi_w: + case Intrinsic::loongarch_lsx_vmaxi_d: ++ case Intrinsic::loongarch_lasx_xvmaxi_b: ++ case Intrinsic::loongarch_lasx_xvmaxi_h: ++ case Intrinsic::loongarch_lasx_xvmaxi_w: ++ case Intrinsic::loongarch_lasx_xvmaxi_d: + return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); + case Intrinsic::loongarch_lsx_vmaxi_bu: + case Intrinsic::loongarch_lsx_vmaxi_hu: + case Intrinsic::loongarch_lsx_vmaxi_wu: + case Intrinsic::loongarch_lsx_vmaxi_du: ++ case Intrinsic::loongarch_lasx_xvmaxi_bu: ++ case Intrinsic::loongarch_lasx_xvmaxi_hu: ++ case Intrinsic::loongarch_lasx_xvmaxi_wu: ++ case Intrinsic::loongarch_lasx_xvmaxi_du: + return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vmin_b: + case Intrinsic::loongarch_lsx_vmin_h: + case Intrinsic::loongarch_lsx_vmin_w: + case Intrinsic::loongarch_lsx_vmin_d: ++ case Intrinsic::loongarch_lasx_xvmin_b: ++ case Intrinsic::loongarch_lasx_xvmin_h: ++ case Intrinsic::loongarch_lasx_xvmin_w: ++ case Intrinsic::loongarch_lasx_xvmin_d: + return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmin_bu: + case Intrinsic::loongarch_lsx_vmin_hu: + case Intrinsic::loongarch_lsx_vmin_wu: + case Intrinsic::loongarch_lsx_vmin_du: ++ case Intrinsic::loongarch_lasx_xvmin_bu: ++ case Intrinsic::loongarch_lasx_xvmin_hu: ++ case Intrinsic::loongarch_lasx_xvmin_wu: ++ case Intrinsic::loongarch_lasx_xvmin_du: + return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmini_b: + case Intrinsic::loongarch_lsx_vmini_h: + case Intrinsic::loongarch_lsx_vmini_w: + case Intrinsic::loongarch_lsx_vmini_d: ++ case Intrinsic::loongarch_lasx_xvmini_b: ++ case Intrinsic::loongarch_lasx_xvmini_h: ++ case Intrinsic::loongarch_lasx_xvmini_w: ++ case Intrinsic::loongarch_lasx_xvmini_d: + return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); + case Intrinsic::loongarch_lsx_vmini_bu: + case Intrinsic::loongarch_lsx_vmini_hu: + case Intrinsic::loongarch_lsx_vmini_wu: + case Intrinsic::loongarch_lsx_vmini_du: ++ case Intrinsic::loongarch_lasx_xvmini_bu: ++ case Intrinsic::loongarch_lasx_xvmini_hu: ++ case Intrinsic::loongarch_lasx_xvmini_wu: ++ case Intrinsic::loongarch_lasx_xvmini_du: + return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vmul_b: + case Intrinsic::loongarch_lsx_vmul_h: + case Intrinsic::loongarch_lsx_vmul_w: + case Intrinsic::loongarch_lsx_vmul_d: ++ case Intrinsic::loongarch_lasx_xvmul_b: ++ case Intrinsic::loongarch_lasx_xvmul_h: ++ case Intrinsic::loongarch_lasx_xvmul_w: ++ case Intrinsic::loongarch_lasx_xvmul_d: + return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmadd_b: + case Intrinsic::loongarch_lsx_vmadd_h: + case Intrinsic::loongarch_lsx_vmadd_w: +- case Intrinsic::loongarch_lsx_vmadd_d: { ++ case Intrinsic::loongarch_lsx_vmadd_d: ++ case Intrinsic::loongarch_lasx_xvmadd_b: ++ case Intrinsic::loongarch_lasx_xvmadd_h: ++ case Intrinsic::loongarch_lasx_xvmadd_w: ++ case Intrinsic::loongarch_lasx_xvmadd_d: { + EVT ResTy = N->getValueType(0); + return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), +@@ -2211,7 +2456,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vmsub_b: + case Intrinsic::loongarch_lsx_vmsub_h: + case Intrinsic::loongarch_lsx_vmsub_w: +- case Intrinsic::loongarch_lsx_vmsub_d: { ++ case Intrinsic::loongarch_lsx_vmsub_d: ++ case Intrinsic::loongarch_lasx_xvmsub_b: ++ case Intrinsic::loongarch_lasx_xvmsub_h: ++ case Intrinsic::loongarch_lasx_xvmsub_w: ++ case Intrinsic::loongarch_lasx_xvmsub_d: { + EVT ResTy = N->getValueType(0); + return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), +@@ -2221,125 +2470,188 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vdiv_h: + case Intrinsic::loongarch_lsx_vdiv_w: + case Intrinsic::loongarch_lsx_vdiv_d: ++ case Intrinsic::loongarch_lasx_xvdiv_b: ++ case Intrinsic::loongarch_lasx_xvdiv_h: ++ case Intrinsic::loongarch_lasx_xvdiv_w: ++ case Intrinsic::loongarch_lasx_xvdiv_d: + return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vdiv_bu: + case Intrinsic::loongarch_lsx_vdiv_hu: + case Intrinsic::loongarch_lsx_vdiv_wu: + case Intrinsic::loongarch_lsx_vdiv_du: ++ case Intrinsic::loongarch_lasx_xvdiv_bu: ++ case Intrinsic::loongarch_lasx_xvdiv_hu: ++ case Intrinsic::loongarch_lasx_xvdiv_wu: ++ case Intrinsic::loongarch_lasx_xvdiv_du: + return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_b: + case Intrinsic::loongarch_lsx_vmod_h: + case Intrinsic::loongarch_lsx_vmod_w: + case Intrinsic::loongarch_lsx_vmod_d: ++ case Intrinsic::loongarch_lasx_xvmod_b: ++ case Intrinsic::loongarch_lasx_xvmod_h: ++ case Intrinsic::loongarch_lasx_xvmod_w: ++ case Intrinsic::loongarch_lasx_xvmod_d: + return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_bu: + case Intrinsic::loongarch_lsx_vmod_hu: + case Intrinsic::loongarch_lsx_vmod_wu: + case Intrinsic::loongarch_lsx_vmod_du: ++ case Intrinsic::loongarch_lasx_xvmod_bu: ++ case Intrinsic::loongarch_lasx_xvmod_hu: ++ case Intrinsic::loongarch_lasx_xvmod_wu: ++ case Intrinsic::loongarch_lasx_xvmod_du: + return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vand_v: ++ case Intrinsic::loongarch_lasx_xvand_v: + return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vor_v: ++ case Intrinsic::loongarch_lasx_xvor_v: + return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vxor_v: ++ case Intrinsic::loongarch_lasx_xvxor_v: + return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); +- case Intrinsic::loongarch_lsx_vnor_v: { ++ case Intrinsic::loongarch_lsx_vnor_v: ++ case Intrinsic::loongarch_lasx_xvnor_v: { + SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); + } + case Intrinsic::loongarch_lsx_vandi_b: ++ case Intrinsic::loongarch_lasx_xvandi_b: + return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vori_b: ++ case Intrinsic::loongarch_lasx_xvori_b: + return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vxori_b: ++ case Intrinsic::loongarch_lasx_xvxori_b: + return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsll_b: + case Intrinsic::loongarch_lsx_vsll_h: + case Intrinsic::loongarch_lsx_vsll_w: + case Intrinsic::loongarch_lsx_vsll_d: ++ case Intrinsic::loongarch_lasx_xvsll_b: ++ case Intrinsic::loongarch_lasx_xvsll_h: ++ case Intrinsic::loongarch_lasx_xvsll_w: ++ case Intrinsic::loongarch_lasx_xvsll_d: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vslli_b: ++ case Intrinsic::loongarch_lasx_xvslli_b: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_h: ++ case Intrinsic::loongarch_lasx_xvslli_h: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_w: ++ case Intrinsic::loongarch_lasx_xvslli_w: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_d: ++ case Intrinsic::loongarch_lasx_xvslli_d: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrl_b: + case Intrinsic::loongarch_lsx_vsrl_h: + case Intrinsic::loongarch_lsx_vsrl_w: + case Intrinsic::loongarch_lsx_vsrl_d: ++ case Intrinsic::loongarch_lasx_xvsrl_b: ++ case Intrinsic::loongarch_lasx_xvsrl_h: ++ case Intrinsic::loongarch_lasx_xvsrl_w: ++ case Intrinsic::loongarch_lasx_xvsrl_d: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vsrli_b: ++ case Intrinsic::loongarch_lasx_xvsrli_b: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_h: ++ case Intrinsic::loongarch_lasx_xvsrli_h: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_w: ++ case Intrinsic::loongarch_lasx_xvsrli_w: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_d: ++ case Intrinsic::loongarch_lasx_xvsrli_d: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsra_b: + case Intrinsic::loongarch_lsx_vsra_h: + case Intrinsic::loongarch_lsx_vsra_w: + case Intrinsic::loongarch_lsx_vsra_d: ++ case Intrinsic::loongarch_lasx_xvsra_b: ++ case Intrinsic::loongarch_lasx_xvsra_h: ++ case Intrinsic::loongarch_lasx_xvsra_w: ++ case Intrinsic::loongarch_lasx_xvsra_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vsrai_b: ++ case Intrinsic::loongarch_lasx_xvsrai_b: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_h: ++ case Intrinsic::loongarch_lasx_xvsrai_h: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_w: ++ case Intrinsic::loongarch_lasx_xvsrai_w: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_d: ++ case Intrinsic::loongarch_lasx_xvsrai_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vpcnt_b: + case Intrinsic::loongarch_lsx_vpcnt_h: + case Intrinsic::loongarch_lsx_vpcnt_w: + case Intrinsic::loongarch_lsx_vpcnt_d: ++ case Intrinsic::loongarch_lasx_xvpcnt_b: ++ case Intrinsic::loongarch_lasx_xvpcnt_h: ++ case Intrinsic::loongarch_lasx_xvpcnt_w: ++ case Intrinsic::loongarch_lasx_xvpcnt_d: + return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); + case Intrinsic::loongarch_lsx_vbitclr_b: + case Intrinsic::loongarch_lsx_vbitclr_h: + case Intrinsic::loongarch_lsx_vbitclr_w: + case Intrinsic::loongarch_lsx_vbitclr_d: ++ case Intrinsic::loongarch_lasx_xvbitclr_b: ++ case Intrinsic::loongarch_lasx_xvbitclr_h: ++ case Intrinsic::loongarch_lasx_xvbitclr_w: ++ case Intrinsic::loongarch_lasx_xvbitclr_d: + return lowerVectorBitClear(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_b: ++ case Intrinsic::loongarch_lasx_xvbitclri_b: + return lowerVectorBitClearImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_h: ++ case Intrinsic::loongarch_lasx_xvbitclri_h: + return lowerVectorBitClearImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_w: ++ case Intrinsic::loongarch_lasx_xvbitclri_w: + return lowerVectorBitClearImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_d: ++ case Intrinsic::loongarch_lasx_xvbitclri_d: + return lowerVectorBitClearImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vbitset_b: + case Intrinsic::loongarch_lsx_vbitset_h: + case Intrinsic::loongarch_lsx_vbitset_w: +- case Intrinsic::loongarch_lsx_vbitset_d: { ++ case Intrinsic::loongarch_lsx_vbitset_d: ++ case Intrinsic::loongarch_lasx_xvbitset_b: ++ case Intrinsic::loongarch_lasx_xvbitset_h: ++ case Intrinsic::loongarch_lasx_xvbitset_w: ++ case Intrinsic::loongarch_lasx_xvbitset_d: { + EVT VecTy = N->getValueType(0); + SDValue One = DAG.getConstant(1, DL, VecTy); + return DAG.getNode( +@@ -2347,17 +2659,25 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); + } + case Intrinsic::loongarch_lsx_vbitseti_b: ++ case Intrinsic::loongarch_lasx_xvbitseti_b: + return lowerVectorBitSetImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_h: ++ case Intrinsic::loongarch_lasx_xvbitseti_h: + return lowerVectorBitSetImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_w: ++ case Intrinsic::loongarch_lasx_xvbitseti_w: + return lowerVectorBitSetImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_d: ++ case Intrinsic::loongarch_lasx_xvbitseti_d: + return lowerVectorBitSetImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrev_b: + case Intrinsic::loongarch_lsx_vbitrev_h: + case Intrinsic::loongarch_lsx_vbitrev_w: +- case Intrinsic::loongarch_lsx_vbitrev_d: { ++ case Intrinsic::loongarch_lsx_vbitrev_d: ++ case Intrinsic::loongarch_lasx_xvbitrev_b: ++ case Intrinsic::loongarch_lasx_xvbitrev_h: ++ case Intrinsic::loongarch_lasx_xvbitrev_w: ++ case Intrinsic::loongarch_lasx_xvbitrev_d: { + EVT VecTy = N->getValueType(0); + SDValue One = DAG.getConstant(1, DL, VecTy); + return DAG.getNode( +@@ -2365,31 +2685,45 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); + } + case Intrinsic::loongarch_lsx_vbitrevi_b: ++ case Intrinsic::loongarch_lasx_xvbitrevi_b: + return lowerVectorBitRevImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_h: ++ case Intrinsic::loongarch_lasx_xvbitrevi_h: + return lowerVectorBitRevImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_w: ++ case Intrinsic::loongarch_lasx_xvbitrevi_w: + return lowerVectorBitRevImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_d: ++ case Intrinsic::loongarch_lasx_xvbitrevi_d: + return lowerVectorBitRevImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vfadd_s: + case Intrinsic::loongarch_lsx_vfadd_d: ++ case Intrinsic::loongarch_lasx_xvfadd_s: ++ case Intrinsic::loongarch_lasx_xvfadd_d: + return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfsub_s: + case Intrinsic::loongarch_lsx_vfsub_d: ++ case Intrinsic::loongarch_lasx_xvfsub_s: ++ case Intrinsic::loongarch_lasx_xvfsub_d: + return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfmul_s: + case Intrinsic::loongarch_lsx_vfmul_d: ++ case Intrinsic::loongarch_lasx_xvfmul_s: ++ case Intrinsic::loongarch_lasx_xvfmul_d: + return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfdiv_s: + case Intrinsic::loongarch_lsx_vfdiv_d: ++ case Intrinsic::loongarch_lasx_xvfdiv_s: ++ case Intrinsic::loongarch_lasx_xvfdiv_d: + return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfmadd_s: + case Intrinsic::loongarch_lsx_vfmadd_d: ++ case Intrinsic::loongarch_lasx_xvfmadd_s: ++ case Intrinsic::loongarch_lasx_xvfmadd_d: + return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + case Intrinsic::loongarch_lsx_vinsgr2vr_b: +@@ -2397,10 +2731,12 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_h: ++ case Intrinsic::loongarch_lasx_xvinsgr2vr_w: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_w: ++ case Intrinsic::loongarch_lasx_xvinsgr2vr_d: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); +@@ -2411,7 +2747,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vreplgr2vr_b: + case Intrinsic::loongarch_lsx_vreplgr2vr_h: + case Intrinsic::loongarch_lsx_vreplgr2vr_w: +- case Intrinsic::loongarch_lsx_vreplgr2vr_d: { ++ case Intrinsic::loongarch_lsx_vreplgr2vr_d: ++ case Intrinsic::loongarch_lasx_xvreplgr2vr_b: ++ case Intrinsic::loongarch_lasx_xvreplgr2vr_h: ++ case Intrinsic::loongarch_lasx_xvreplgr2vr_w: ++ case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { + EVT ResTy = N->getValueType(0); + SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); + return DAG.getBuildVector(ResTy, DL, Ops); +@@ -2420,6 +2760,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vreplve_h: + case Intrinsic::loongarch_lsx_vreplve_w: + case Intrinsic::loongarch_lsx_vreplve_d: ++ case Intrinsic::loongarch_lasx_xvreplve_b: ++ case Intrinsic::loongarch_lasx_xvreplve_h: ++ case Intrinsic::loongarch_lasx_xvreplve_w: ++ case Intrinsic::loongarch_lasx_xvreplve_d: + return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), + N->getOperand(1), + DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), +@@ -2534,6 +2878,36 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, + case LoongArch::PseudoVBNZ_D: + CondOpc = LoongArch::VSETALLNEZ_D; + break; ++ case LoongArch::PseudoXVBZ: ++ CondOpc = LoongArch::XVSETEQZ_V; ++ break; ++ case LoongArch::PseudoXVBZ_B: ++ CondOpc = LoongArch::XVSETANYEQZ_B; ++ break; ++ case LoongArch::PseudoXVBZ_H: ++ CondOpc = LoongArch::XVSETANYEQZ_H; ++ break; ++ case LoongArch::PseudoXVBZ_W: ++ CondOpc = LoongArch::XVSETANYEQZ_W; ++ break; ++ case LoongArch::PseudoXVBZ_D: ++ CondOpc = LoongArch::XVSETANYEQZ_D; ++ break; ++ case LoongArch::PseudoXVBNZ: ++ CondOpc = LoongArch::XVSETNEZ_V; ++ break; ++ case LoongArch::PseudoXVBNZ_B: ++ CondOpc = LoongArch::XVSETALLNEZ_B; ++ break; ++ case LoongArch::PseudoXVBNZ_H: ++ CondOpc = LoongArch::XVSETALLNEZ_H; ++ break; ++ case LoongArch::PseudoXVBNZ_W: ++ CondOpc = LoongArch::XVSETALLNEZ_W; ++ break; ++ case LoongArch::PseudoXVBNZ_D: ++ CondOpc = LoongArch::XVSETALLNEZ_D; ++ break; + } + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); +@@ -2636,6 +3010,16 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + case LoongArch::PseudoVBNZ_H: + case LoongArch::PseudoVBNZ_W: + case LoongArch::PseudoVBNZ_D: ++ case LoongArch::PseudoXVBZ: ++ case LoongArch::PseudoXVBZ_B: ++ case LoongArch::PseudoXVBZ_H: ++ case LoongArch::PseudoXVBZ_W: ++ case LoongArch::PseudoXVBZ_D: ++ case LoongArch::PseudoXVBNZ: ++ case LoongArch::PseudoXVBNZ_B: ++ case LoongArch::PseudoXVBNZ_H: ++ case LoongArch::PseudoXVBNZ_W: ++ case LoongArch::PseudoXVBNZ_D: + return emitVecCondBranchPseudo(MI, BB, Subtarget); + } + } +@@ -2746,6 +3130,10 @@ const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, + LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, + LoongArch::VR6, LoongArch::VR7}; + ++const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, ++ LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, ++ LoongArch::XR6, LoongArch::XR7}; ++ + // Pass a 2*GRLen argument that has been split into two GRLen values through + // registers or the stack as necessary. + static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, +@@ -2894,6 +3282,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, + Reg = State.AllocateReg(ArgFPR64s); + else if (ValVT.is128BitVector()) + Reg = State.AllocateReg(ArgVRs); ++ else if (ValVT.is256BitVector()) ++ Reg = State.AllocateReg(ArgXRs); + else + Reg = State.AllocateReg(ArgGPRs); + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +index a5d66ebac96a..ddd1c9943fac 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +@@ -55,6 +55,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + return; + } + ++ // XR->XR copies. ++ if (LoongArch::LASX256RegClass.contains(DstReg, SrcReg)) { ++ BuildMI(MBB, MBBI, DL, get(LoongArch::XVORI_B), DstReg) ++ .addReg(SrcReg, getKillRegState(KillSrc)) ++ .addImm(0); ++ return; ++ } ++ + // GPR->CFR copy. + if (LoongArch::CFRRegClass.contains(DstReg) && + LoongArch::GPRRegClass.contains(SrcReg)) { +@@ -109,6 +117,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( + Opcode = LoongArch::FST_D; + else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::VST; ++ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) ++ Opcode = LoongArch::XVST; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoST_CFR; + else +@@ -145,6 +155,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + Opcode = LoongArch::FLD_D; + else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::VLD; ++ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) ++ Opcode = LoongArch::XVLD; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoLD_CFR; + else +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index dc37b37b2186..a3afd4789dfc 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -10,6 +10,30 @@ + // + //===----------------------------------------------------------------------===// + ++def lasxsplati8 ++ : PatFrag<(ops node:$e0), ++ (v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplati16 ++ : PatFrag<(ops node:$e0), ++ (v16i16 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplati32 ++ : PatFrag<(ops node:$e0), ++ (v8i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplati64 ++ : PatFrag<(ops node:$e0), ++ (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; ++ + //===----------------------------------------------------------------------===// + // Instruction class templates + //===----------------------------------------------------------------------===// +@@ -1029,4 +1053,682 @@ def PseudoXVREPLI_D : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [], + "xvrepli.d", "$xd, $imm">; + } + ++def PseudoXVBNZ_B : VecCond; ++def PseudoXVBNZ_H : VecCond; ++def PseudoXVBNZ_W : VecCond; ++def PseudoXVBNZ_D : VecCond; ++def PseudoXVBNZ : VecCond; ++ ++def PseudoXVBZ_B : VecCond; ++def PseudoXVBZ_H : VecCond; ++def PseudoXVBZ_W : VecCond; ++def PseudoXVBZ_D : VecCond; ++def PseudoXVBZ : VecCond; ++ ++} // Predicates = [HasExtLASX] ++ ++multiclass PatXr { ++ def : Pat<(v32i8 (OpNode (v32i8 LASX256:$xj))), ++ (!cast(Inst#"_B") LASX256:$xj)>; ++ def : Pat<(v16i16 (OpNode (v16i16 LASX256:$xj))), ++ (!cast(Inst#"_H") LASX256:$xj)>; ++ def : Pat<(v8i32 (OpNode (v8i32 LASX256:$xj))), ++ (!cast(Inst#"_W") LASX256:$xj)>; ++ def : Pat<(v4i64 (OpNode (v4i64 LASX256:$xj))), ++ (!cast(Inst#"_D") LASX256:$xj)>; ++} ++ ++multiclass PatXrXr { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), ++ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatXrXrF { ++ def : Pat<(OpNode (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), ++ (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatXrXrU { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), ++ (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), ++ (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatXrSimm5 { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; ++} ++ ++multiclass PatXrUimm5 { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; ++} ++ ++multiclass PatXrXrXr { ++ def : Pat<(OpNode (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), ++ (v32i8 LASX256:$xk)), ++ (!cast(Inst#"_B") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), ++ (v16i16 LASX256:$xk)), ++ (!cast(Inst#"_H") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), ++ (v8i32 LASX256:$xk)), ++ (!cast(Inst#"_W") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), ++ (v4i64 LASX256:$xk)), ++ (!cast(Inst#"_D") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatShiftXrXr { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (and vsplati8_imm_eq_7, ++ (v32i8 LASX256:$xk))), ++ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (and vsplati16_imm_eq_15, ++ (v16i16 LASX256:$xk))), ++ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (and vsplati32_imm_eq_31, ++ (v8i32 LASX256:$xk))), ++ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (and vsplati64_imm_eq_63, ++ (v4i64 LASX256:$xk))), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatShiftXrUimm { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm3 uimm3:$imm))), ++ (!cast(Inst#"_B") LASX256:$xj, uimm3:$imm)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm4 uimm4:$imm))), ++ (!cast(Inst#"_H") LASX256:$xj, uimm4:$imm)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_W") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm6 uimm6:$imm))), ++ (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; ++} ++ ++class PatXrXrB ++ : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (Inst LASX256:$xj, LASX256:$xk)>; ++ ++let Predicates = [HasExtLASX] in { ++ ++// XVADD_{B/H/W/D} ++defm : PatXrXr; ++// XVSUB_{B/H/W/D} ++defm : PatXrXr; ++ ++// XVADDI_{B/H/W/D}U ++defm : PatXrUimm5; ++// XVSUBI_{B/H/W/D}U ++defm : PatXrUimm5; ++ ++// XVNEG_{B/H/W/D} ++def : Pat<(sub immAllZerosV, (v32i8 LASX256:$xj)), (XVNEG_B LASX256:$xj)>; ++def : Pat<(sub immAllZerosV, (v16i16 LASX256:$xj)), (XVNEG_H LASX256:$xj)>; ++def : Pat<(sub immAllZerosV, (v8i32 LASX256:$xj)), (XVNEG_W LASX256:$xj)>; ++def : Pat<(sub immAllZerosV, (v4i64 LASX256:$xj)), (XVNEG_D LASX256:$xj)>; ++ ++// XVMAX[I]_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++defm : PatXrSimm5; ++defm : PatXrUimm5; ++ ++// XVMIN[I]_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++defm : PatXrSimm5; ++defm : PatXrUimm5; ++ ++// XVMUL_{B/H/W/D} ++defm : PatXrXr; ++ ++// XVMADD_{B/H/W/D} ++defm : PatXrXrXr; ++// XVMSUB_{B/H/W/D} ++defm : PatXrXrXr; ++ ++// XVDIV_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++ ++// XVMOD_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++ ++// XVAND_V ++def : PatXrXrB; ++// XVNOR_V ++def : PatXrXrB; ++// XVXOR_V ++def : PatXrXrB; ++// XVNOR_V ++def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 LASX256:$xk))), ++ (XVNOR_V LASX256:$xj, LASX256:$xk)>; ++ ++// XVANDI_B ++def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), ++ (XVANDI_B LASX256:$xj, uimm8:$imm)>; ++// XVORI_B ++def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), ++ (XVORI_B LASX256:$xj, uimm8:$imm)>; ++ ++// XVXORI_B ++def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), ++ (XVXORI_B LASX256:$xj, uimm8:$imm)>; ++ ++// XVSLL[I]_{B/H/W/D} ++defm : PatXrXr; ++defm : PatShiftXrXr; ++defm : PatShiftXrUimm; ++ ++// XVSRL[I]_{B/H/W/D} ++defm : PatXrXr; ++defm : PatShiftXrXr; ++defm : PatShiftXrUimm; ++ ++// XVSRA[I]_{B/H/W/D} ++defm : PatXrXr; ++defm : PatShiftXrXr; ++defm : PatShiftXrUimm; ++ ++// XVPCNT_{B/H/W/D} ++defm : PatXr; ++ ++// XVBITCLR_{B/H/W/D} ++def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))), ++ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))), ++ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))), ++ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))), ++ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; ++def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati8imm7 v32i8:$xk)))), ++ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati16imm15 v16i16:$xk)))), ++ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati32imm31 v8i32:$xk)))), ++ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati64imm63 v4i64:$xk)))), ++ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; ++ ++// XVBITCLRI_{B/H/W/D} ++def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), ++ (XVBITCLRI_B LASX256:$xj, uimm3:$imm)>; ++def : Pat<(and (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), ++ (XVBITCLRI_H LASX256:$xj, uimm4:$imm)>; ++def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), ++ (XVBITCLRI_W LASX256:$xj, uimm5:$imm)>; ++def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), ++ (XVBITCLRI_D LASX256:$xj, uimm6:$imm)>; ++ ++// XVBITSET_{B/H/W/D} ++def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), ++ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), ++ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), ++ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), ++ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; ++def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), ++ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), ++ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), ++ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), ++ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; ++ ++// XVBITSETI_{B/H/W/D} ++def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), ++ (XVBITSETI_B LASX256:$xj, uimm3:$imm)>; ++def : Pat<(or (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), ++ (XVBITSETI_H LASX256:$xj, uimm4:$imm)>; ++def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), ++ (XVBITSETI_W LASX256:$xj, uimm5:$imm)>; ++def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), ++ (XVBITSETI_D LASX256:$xj, uimm6:$imm)>; ++ ++// XVBITREV_{B/H/W/D} ++def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), ++ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), ++ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), ++ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), ++ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; ++def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), ++ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), ++ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), ++ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), ++ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; ++ ++// XVBITREVI_{B/H/W/D} ++def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), ++ (XVBITREVI_B LASX256:$xj, uimm3:$imm)>; ++def : Pat<(xor (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), ++ (XVBITREVI_H LASX256:$xj, uimm4:$imm)>; ++def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), ++ (XVBITREVI_W LASX256:$xj, uimm5:$imm)>; ++def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), ++ (XVBITREVI_D LASX256:$xj, uimm6:$imm)>; ++ ++// XVFADD_{S/D} ++defm : PatXrXrF; ++ ++// XVFSUB_{S/D} ++defm : PatXrXrF; ++ ++// XVFMUL_{S/D} ++defm : PatXrXrF; ++ ++// XVFDIV_{S/D} ++defm : PatXrXrF; ++ ++// XVFMADD_{S/D} ++def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), ++ (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), ++ (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++// XVINSGR2VR_{W/D} ++def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), ++ (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; ++def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), ++ (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; ++ ++// XVPICKVE2GR_W[U] ++def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), ++ (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; ++def : Pat<(loongarch_vpick_zext_elt v8i32:$xd, uimm3:$imm, i32), ++ (XVPICKVE2GR_WU v8i32:$xd, uimm3:$imm)>; ++ ++// XVREPLGR2VR_{B/H/W/D} ++def : Pat<(lasxsplati8 GPR:$rj), (XVREPLGR2VR_B GPR:$rj)>; ++def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>; ++def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>; ++def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>; ++ ++// XVREPLVE_{B/H/W/D} ++def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk), ++ (XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v16i16:$xj, GRLenVT:$rk), ++ (XVREPLVE_H v16i16:$xj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), ++ (XVREPLVE_W v8i32:$xj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), ++ (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; ++ ++// Loads/Stores ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in { ++ defm : LdPat; ++ def : RegRegLdPat; ++ defm : StPat; ++ def : RegRegStPat; ++} ++ ++} // Predicates = [HasExtLASX] ++ ++/// Intrinsic pattern ++ ++class deriveLASXIntrinsic { ++ Intrinsic ret = !cast(!tolower("int_loongarch_lasx_"#Inst)); ++} ++ ++let Predicates = [HasExtLASX] in { ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xj, vty:$xk), ++// (LAInst vty:$xj, vty:$xk)>; ++foreach Inst = ["XVSADD_B", "XVSADD_BU", "XVSSUB_B", "XVSSUB_BU", ++ "XVHADDW_H_B", "XVHADDW_HU_BU", "XVHSUBW_H_B", "XVHSUBW_HU_BU", ++ "XVADDWEV_H_B", "XVADDWOD_H_B", "XVSUBWEV_H_B", "XVSUBWOD_H_B", ++ "XVADDWEV_H_BU", "XVADDWOD_H_BU", "XVSUBWEV_H_BU", "XVSUBWOD_H_BU", ++ "XVADDWEV_H_BU_B", "XVADDWOD_H_BU_B", ++ "XVAVG_B", "XVAVG_BU", "XVAVGR_B", "XVAVGR_BU", ++ "XVABSD_B", "XVABSD_BU", "XVADDA_B", "XVMUH_B", "XVMUH_BU", ++ "XVMULWEV_H_B", "XVMULWOD_H_B", "XVMULWEV_H_BU", "XVMULWOD_H_BU", ++ "XVMULWEV_H_BU_B", "XVMULWOD_H_BU_B", "XVSIGNCOV_B", ++ "XVANDN_V", "XVORN_V", "XVROTR_B", "XVSRLR_B", "XVSRAR_B", ++ "XVSEQ_B", "XVSLE_B", "XVSLE_BU", "XVSLT_B", "XVSLT_BU", ++ "XVPACKEV_B", "XVPACKOD_B", "XVPICKEV_B", "XVPICKOD_B", ++ "XVILVL_B", "XVILVH_B"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVSADD_H", "XVSADD_HU", "XVSSUB_H", "XVSSUB_HU", ++ "XVHADDW_W_H", "XVHADDW_WU_HU", "XVHSUBW_W_H", "XVHSUBW_WU_HU", ++ "XVADDWEV_W_H", "XVADDWOD_W_H", "XVSUBWEV_W_H", "XVSUBWOD_W_H", ++ "XVADDWEV_W_HU", "XVADDWOD_W_HU", "XVSUBWEV_W_HU", "XVSUBWOD_W_HU", ++ "XVADDWEV_W_HU_H", "XVADDWOD_W_HU_H", ++ "XVAVG_H", "XVAVG_HU", "XVAVGR_H", "XVAVGR_HU", ++ "XVABSD_H", "XVABSD_HU", "XVADDA_H", "XVMUH_H", "XVMUH_HU", ++ "XVMULWEV_W_H", "XVMULWOD_W_H", "XVMULWEV_W_HU", "XVMULWOD_W_HU", ++ "XVMULWEV_W_HU_H", "XVMULWOD_W_HU_H", "XVSIGNCOV_H", "XVROTR_H", ++ "XVSRLR_H", "XVSRAR_H", "XVSRLN_B_H", "XVSRAN_B_H", "XVSRLRN_B_H", ++ "XVSRARN_B_H", "XVSSRLN_B_H", "XVSSRAN_B_H", "XVSSRLN_BU_H", ++ "XVSSRAN_BU_H", "XVSSRLRN_B_H", "XVSSRARN_B_H", "XVSSRLRN_BU_H", ++ "XVSSRARN_BU_H", ++ "XVSEQ_H", "XVSLE_H", "XVSLE_HU", "XVSLT_H", "XVSLT_HU", ++ "XVPACKEV_H", "XVPACKOD_H", "XVPICKEV_H", "XVPICKOD_H", ++ "XVILVL_H", "XVILVH_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVSADD_W", "XVSADD_WU", "XVSSUB_W", "XVSSUB_WU", ++ "XVHADDW_D_W", "XVHADDW_DU_WU", "XVHSUBW_D_W", "XVHSUBW_DU_WU", ++ "XVADDWEV_D_W", "XVADDWOD_D_W", "XVSUBWEV_D_W", "XVSUBWOD_D_W", ++ "XVADDWEV_D_WU", "XVADDWOD_D_WU", "XVSUBWEV_D_WU", "XVSUBWOD_D_WU", ++ "XVADDWEV_D_WU_W", "XVADDWOD_D_WU_W", ++ "XVAVG_W", "XVAVG_WU", "XVAVGR_W", "XVAVGR_WU", ++ "XVABSD_W", "XVABSD_WU", "XVADDA_W", "XVMUH_W", "XVMUH_WU", ++ "XVMULWEV_D_W", "XVMULWOD_D_W", "XVMULWEV_D_WU", "XVMULWOD_D_WU", ++ "XVMULWEV_D_WU_W", "XVMULWOD_D_WU_W", "XVSIGNCOV_W", "XVROTR_W", ++ "XVSRLR_W", "XVSRAR_W", "XVSRLN_H_W", "XVSRAN_H_W", "XVSRLRN_H_W", ++ "XVSRARN_H_W", "XVSSRLN_H_W", "XVSSRAN_H_W", "XVSSRLN_HU_W", ++ "XVSSRAN_HU_W", "XVSSRLRN_H_W", "XVSSRARN_H_W", "XVSSRLRN_HU_W", ++ "XVSSRARN_HU_W", ++ "XVSEQ_W", "XVSLE_W", "XVSLE_WU", "XVSLT_W", "XVSLT_WU", ++ "XVPACKEV_W", "XVPACKOD_W", "XVPICKEV_W", "XVPICKOD_W", ++ "XVILVL_W", "XVILVH_W", "XVPERM_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVADD_Q", "XVSUB_Q", ++ "XVSADD_D", "XVSADD_DU", "XVSSUB_D", "XVSSUB_DU", ++ "XVHADDW_Q_D", "XVHADDW_QU_DU", "XVHSUBW_Q_D", "XVHSUBW_QU_DU", ++ "XVADDWEV_Q_D", "XVADDWOD_Q_D", "XVSUBWEV_Q_D", "XVSUBWOD_Q_D", ++ "XVADDWEV_Q_DU", "XVADDWOD_Q_DU", "XVSUBWEV_Q_DU", "XVSUBWOD_Q_DU", ++ "XVADDWEV_Q_DU_D", "XVADDWOD_Q_DU_D", ++ "XVAVG_D", "XVAVG_DU", "XVAVGR_D", "XVAVGR_DU", ++ "XVABSD_D", "XVABSD_DU", "XVADDA_D", "XVMUH_D", "XVMUH_DU", ++ "XVMULWEV_Q_D", "XVMULWOD_Q_D", "XVMULWEV_Q_DU", "XVMULWOD_Q_DU", ++ "XVMULWEV_Q_DU_D", "XVMULWOD_Q_DU_D", "XVSIGNCOV_D", "XVROTR_D", ++ "XVSRLR_D", "XVSRAR_D", "XVSRLN_W_D", "XVSRAN_W_D", "XVSRLRN_W_D", ++ "XVSRARN_W_D", "XVSSRLN_W_D", "XVSSRAN_W_D", "XVSSRLN_WU_D", ++ "XVSSRAN_WU_D", "XVSSRLRN_W_D", "XVSSRARN_W_D", "XVSSRLRN_WU_D", ++ "XVSSRARN_WU_D", "XVFFINT_S_L", ++ "XVSEQ_D", "XVSLE_D", "XVSLE_DU", "XVSLT_D", "XVSLT_DU", ++ "XVPACKEV_D", "XVPACKOD_D", "XVPICKEV_D", "XVPICKOD_D", ++ "XVILVL_D", "XVILVH_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), ++// (LAInst vty:$xd, vty:$xj, vty:$xk)>; ++foreach Inst = ["XVMADDWEV_H_B", "XVMADDWOD_H_B", "XVMADDWEV_H_BU", ++ "XVMADDWOD_H_BU", "XVMADDWEV_H_BU_B", "XVMADDWOD_H_BU_B"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v16i16 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVMADDWEV_W_H", "XVMADDWOD_W_H", "XVMADDWEV_W_HU", ++ "XVMADDWOD_W_HU", "XVMADDWEV_W_HU_H", "XVMADDWOD_W_HU_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8i32 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVMADDWEV_D_W", "XVMADDWOD_D_W", "XVMADDWEV_D_WU", ++ "XVMADDWOD_D_WU", "XVMADDWEV_D_WU_W", "XVMADDWOD_D_WU_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4i64 LASX256:$xd), (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", ++ "XVMADDWOD_Q_DU", "XVMADDWEV_Q_DU_D", "XVMADDWOD_Q_DU_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xj), ++// (LAInst vty:$xj)>; ++foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", ++ "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", ++ "XVCLO_B", "XVCLZ_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", ++ "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", ++ "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", ++ "XVCLO_H", "XVCLZ_H", "XVFCVTL_S_H", "XVFCVTH_S_H", ++ "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", ++ "VEXT2XV_DU_HU", "XVREPLVE0_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", ++ "XVCLO_W", "XVCLZ_W", "XVFFINT_S_W", "XVFFINT_S_WU", ++ "XVFFINTL_D_W", "XVFFINTH_D_W", ++ "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", ++ "XVEXTL_Q_D", "XVEXTL_QU_DU", ++ "XVCLO_D", "XVCLZ_D", "XVFFINT_D_L", "XVFFINT_D_LU", ++ "XVREPLVE0_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++ ++// Pat<(Intrinsic timm:$imm) ++// (LAInst timm:$imm)>; ++def : Pat<(int_loongarch_lasx_xvldi timm:$imm), ++ (XVLDI (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret timm:$imm), ++ (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xj, timm:$imm) ++// (LAInst vty:$xj, timm:$imm)>; ++foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", ++ "XVSLLWIL_HU_BU", "XVSRLRI_B", "XVSRARI_B", ++ "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", ++ "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", ++ "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", ++ "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", ++ "XVREPL128VEI_H", "XVSHUF4I_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", ++ "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", ++ "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", ++ "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", ++ "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", ++ "XVPICKVE2GR_D", "XVPICKVE2GR_DU", ++ "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) ++// (LAInst vty:$xd, vty:$xj, timm:$imm)>; ++foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", ++ "XVSSRLNI_B_H", "XVSSRANI_B_H", "XVSSRLNI_BU_H", "XVSSRANI_BU_H", ++ "XVSSRLRNI_B_H", "XVSSRARNI_B_H", "XVSSRLRNI_BU_H", "XVSSRARNI_BU_H", ++ "XVFRSTPI_B", "XVBITSELI_B", "XVEXTRINS_B", "XVPERMI_Q"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", ++ "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", ++ "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", ++ "XVFRSTPI_H", "XVEXTRINS_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", ++ "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", ++ "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", ++ "XVPERMI_W", "XVEXTRINS_W", "XVINSVE0_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", ++ "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", ++ "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", ++ "XVSHUF4I_D", "XVEXTRINS_D", "XVINSVE0_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, ++ (to_valide_timm timm:$imm))>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), ++// (LAInst vty:$xd, vty:$xj, vty:$xk)>; ++foreach Inst = ["XVFRSTP_B", "XVBITSEL_V", "XVSHUF_B"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVFRSTP_H", "XVSHUF_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++def : Pat<(int_loongarch_lasx_xvshuf_w (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), ++ (v8i32 LASX256:$xk)), ++ (XVSHUF_W LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++def : Pat<(int_loongarch_lasx_xvshuf_d (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), ++ (v4i64 LASX256:$xk)), ++ (XVSHUF_D LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ ++// vty: v8f32/v4f64 ++// Pat<(Intrinsic vty:$xj, vty:$xk, vty:$xa), ++// (LAInst vty:$xj, vty:$xk, vty:$xa)>; ++foreach Inst = ["XVFMSUB_S", "XVFNMADD_S", "XVFNMSUB_S"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), (v8f32 LASX256:$xa)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; ++foreach Inst = ["XVFMSUB_D", "XVFNMADD_D", "XVFNMSUB_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), (v4f64 LASX256:$xa)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; ++ ++// vty: v8f32/v4f64 ++// Pat<(Intrinsic vty:$xj, vty:$xk), ++// (LAInst vty:$xj, vty:$xk)>; ++foreach Inst = ["XVFMAX_S", "XVFMIN_S", "XVFMAXA_S", "XVFMINA_S", "XVFCVT_H_S", ++ "XVFCMP_CAF_S", "XVFCMP_CUN_S", "XVFCMP_CEQ_S", "XVFCMP_CUEQ_S", ++ "XVFCMP_CLT_S", "XVFCMP_CULT_S", "XVFCMP_CLE_S", "XVFCMP_CULE_S", ++ "XVFCMP_CNE_S", "XVFCMP_COR_S", "XVFCMP_CUNE_S", ++ "XVFCMP_SAF_S", "XVFCMP_SUN_S", "XVFCMP_SEQ_S", "XVFCMP_SUEQ_S", ++ "XVFCMP_SLT_S", "XVFCMP_SULT_S", "XVFCMP_SLE_S", "XVFCMP_SULE_S", ++ "XVFCMP_SNE_S", "XVFCMP_SOR_S", "XVFCMP_SUNE_S"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVFMAX_D", "XVFMIN_D", "XVFMAXA_D", "XVFMINA_D", "XVFCVT_S_D", ++ "XVFTINTRNE_W_D", "XVFTINTRZ_W_D", "XVFTINTRP_W_D", "XVFTINTRM_W_D", ++ "XVFTINT_W_D", ++ "XVFCMP_CAF_D", "XVFCMP_CUN_D", "XVFCMP_CEQ_D", "XVFCMP_CUEQ_D", ++ "XVFCMP_CLT_D", "XVFCMP_CULT_D", "XVFCMP_CLE_D", "XVFCMP_CULE_D", ++ "XVFCMP_CNE_D", "XVFCMP_COR_D", "XVFCMP_CUNE_D", ++ "XVFCMP_SAF_D", "XVFCMP_SUN_D", "XVFCMP_SEQ_D", "XVFCMP_SUEQ_D", ++ "XVFCMP_SLT_D", "XVFCMP_SULT_D", "XVFCMP_SLE_D", "XVFCMP_SULE_D", ++ "XVFCMP_SNE_D", "XVFCMP_SOR_D", "XVFCMP_SUNE_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++ ++// vty: v8f32/v4f64 ++// Pat<(Intrinsic vty:$xj), ++// (LAInst vty:$xj)>; ++foreach Inst = ["XVFLOGB_S", "XVFCLASS_S", "XVFSQRT_S", "XVFRECIP_S", "XVFRSQRT_S", ++ "XVFRINT_S", "XVFCVTL_D_S", "XVFCVTH_D_S", ++ "XVFRINTRNE_S", "XVFRINTRZ_S", "XVFRINTRP_S", "XVFRINTRM_S", ++ "XVFTINTRNE_W_S", "XVFTINTRZ_W_S", "XVFTINTRP_W_S", "XVFTINTRM_W_S", ++ "XVFTINT_W_S", "XVFTINTRZ_WU_S", "XVFTINT_WU_S", ++ "XVFTINTRNEL_L_S", "XVFTINTRNEH_L_S", "XVFTINTRZL_L_S", ++ "XVFTINTRZH_L_S", "XVFTINTRPL_L_S", "XVFTINTRPH_L_S", ++ "XVFTINTRML_L_S", "XVFTINTRMH_L_S", "XVFTINTL_L_S", ++ "XVFTINTH_L_S"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v8f32 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_D", ++ "XVFRINT_D", ++ "XVFRINTRNE_D", "XVFRINTRZ_D", "XVFRINTRP_D", "XVFRINTRM_D", ++ "XVFTINTRNE_L_D", "XVFTINTRZ_L_D", "XVFTINTRP_L_D", "XVFTINTRM_L_D", ++ "XVFTINT_L_D", "XVFTINTRZ_LU_D", "XVFTINT_LU_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++ ++def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), ++ (XVPICKVE_W v8f32:$xj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), ++ (XVPICKVE_D v4f64:$xj, (to_valide_timm timm:$imm))>; ++ ++// load ++def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), ++ (XVLD GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), ++ (XVLDX GPR:$rj, GPR:$rk)>; ++ ++def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), ++ (XVLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), ++ (XVLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), ++ (XVLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), ++ (XVLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; ++ ++// store ++def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), ++ (XVST LASX256:$xd, GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), ++ (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; ++ ++def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), ++ (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), ++ (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), ++ (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), ++ (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++ + } // Predicates = [HasExtLASX] +-- +2.20.1 + diff --git a/0002-Backport-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocations.patch b/0003-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocat.patch similarity index 99% rename from 0002-Backport-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocations.patch rename to 0003-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocat.patch index 496e268..af01a11 100644 --- a/0002-Backport-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocations.patch +++ b/0003-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocat.patch @@ -17,7 +17,6 @@ the same effect when relaxation is enabled. [1] https://reviews.llvm.org/D155357 (cherry picked from commit a8081ed8ff0fd11fb8d5f4c83df49da909e49612) -Change-Id: Ic4c6a3eb11b576cb0c6ed0eba02150ad67c33cf2 --- llvm/lib/MC/MCExpr.cpp | 3 +- .../MCTargetDesc/LoongArchAsmBackend.cpp | 78 +++++++++++++++++++ diff --git a/0003-LoongArch-Support-finer-grained-DBAR-hints-for-LA664.patch b/0003-LoongArch-Support-finer-grained-DBAR-hints-for-LA664.patch new file mode 100644 index 0000000..717374a --- /dev/null +++ b/0003-LoongArch-Support-finer-grained-DBAR-hints-for-LA664.patch @@ -0,0 +1,839 @@ +From 90a416b90dca40241fcf6429bd14b88e40128038 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Wed, 11 Oct 2023 10:39:13 +0800 +Subject: [PATCH 3/7] [LoongArch] Support finer-grained DBAR hints for LA664+ + (#68787) + +These are treated as DBAR 0 on older uarchs, so we can start to +unconditionally emit the new hints right away. + +Co-authored-by: WANG Rui +(cherry picked from commit 956482de13107b640cffedd08610fcccd98f708f) + +--- + .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- + .../LoongArch/LoongArchISelLowering.cpp | 20 +++++++ + .../Target/LoongArch/LoongArchISelLowering.h | 1 + + .../Target/LoongArch/LoongArchInstrInfo.td | 24 +++++++- + .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 16 ++--- + .../ir-instruction/atomic-cmpxchg.ll | 24 ++++---- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 48 +++++++-------- + .../ir-instruction/fence-singlethread.ll | 4 +- + .../CodeGen/LoongArch/ir-instruction/fence.ll | 16 ++--- + .../ir-instruction/load-store-atomic.ll | 58 +++++++++---------- + 10 files changed, 129 insertions(+), 86 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +index eb78ef065b21..b348cb56c136 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +@@ -579,8 +579,8 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: +- // TODO: acquire +- hint = 0; ++ // acquire ++ hint = 0b10100; + break; + default: + hint = 0x700; +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 5affaf37ad5a..33a3197013cc 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -159,6 +159,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + // The MULO libcall is not part of libgcc, only compiler-rt. + setLibcallName(RTLIB::MULO_I128, nullptr); + ++ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); ++ + static const ISD::CondCode FPCCToExpand[] = { + ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, + ISD::SETGE, ISD::SETNE, ISD::SETGT}; +@@ -366,6 +368,8 @@ bool LoongArchTargetLowering::isOffsetFoldingLegal( + SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { ++ case ISD::ATOMIC_FENCE: ++ return lowerATOMIC_FENCE(Op, DAG); + case ISD::EH_DWARF_CFA: + return lowerEH_DWARF_CFA(Op, DAG); + case ISD::GlobalAddress: +@@ -542,6 +546,22 @@ LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + return SDValue(); + } + ++SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ SyncScope::ID FenceSSID = ++ static_cast(Op.getConstantOperandVal(2)); ++ ++ // singlethread fences only synchronize with signal handlers on the same ++ // thread and thus only need to preserve instruction order, not actually ++ // enforce memory ordering. ++ if (FenceSSID == SyncScope::SingleThread) ++ // MEMBARRIER is a compiler barrier; it codegens to a no-op. ++ return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); ++ ++ return Op; ++} ++ + SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, + SelectionDAG &DAG) const { + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 6b5a851ec55d..23b90640a690 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -266,6 +266,7 @@ private: + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; ++ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index a9b0db30c2f6..fcbd314507a5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1590,7 +1590,29 @@ def : RegRegStPat; + + /// Atomic loads and stores + +-def : Pat<(atomic_fence timm, timm), (DBAR 0)>; ++// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from ++// the Linux patch revealing it [1]: ++// ++// - Bit 4: kind of constraint (0: completion, 1: ordering) ++// - Bit 3: barrier for previous read (0: true, 1: false) ++// - Bit 2: barrier for previous write (0: true, 1: false) ++// - Bit 1: barrier for succeeding read (0: true, 1: false) ++// - Bit 0: barrier for succeeding write (0: true, 1: false) ++// ++// Hint 0x700: barrier for "read after read" from the same address, which is ++// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as ++// nop if such reordering is disabled on supporting newer models.) ++// ++// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ ++// ++// Implementations without support for the finer-granularity hints simply treat ++// all as the full barrier (DBAR 0), so we can unconditionally start emiting the ++// more precise hints right away. ++ ++def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire ++def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release ++def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel ++def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst + + defm : LdPat; + defm : LdPat; +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +index 32106886c783..d8908acbc945 100644 +--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +@@ -40,7 +40,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: b .LBB0_6 + ; LA64-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -93,7 +93,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: b .LBB1_6 + ; LA64-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -133,7 +133,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: b .LBB2_6 + ; LA64-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64-NEXT: move $a3, $a1 +@@ -171,7 +171,7 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: b .LBB3_6 + ; LA64-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB3_1 +@@ -226,7 +226,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: b .LBB4_6 + ; LA64-NEXT: .LBB4_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB4_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -284,7 +284,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: b .LBB5_6 + ; LA64-NEXT: .LBB5_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB5_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -329,7 +329,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: b .LBB6_6 + ; LA64-NEXT: .LBB6_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB6_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 +@@ -372,7 +372,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: b .LBB7_6 + ; LA64-NEXT: .LBB7_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB7_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB7_1 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 1ac20d10e587..4f25a1d69af1 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -27,7 +27,7 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-NEXT: beqz $a5, .LBB0_1 + ; LA64-NEXT: b .LBB0_4 + ; LA64-NEXT: .LBB0_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB0_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire +@@ -61,7 +61,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind + ; LA64-NEXT: beqz $a5, .LBB1_1 + ; LA64-NEXT: b .LBB1_4 + ; LA64-NEXT: .LBB1_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB1_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire +@@ -80,7 +80,7 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind + ; LA64-NEXT: beqz $a4, .LBB2_1 + ; LA64-NEXT: b .LBB2_4 + ; LA64-NEXT: .LBB2_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB2_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -99,7 +99,7 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind + ; LA64-NEXT: beqz $a4, .LBB3_1 + ; LA64-NEXT: b .LBB3_4 + ; LA64-NEXT: .LBB3_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB3_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire +@@ -132,7 +132,7 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: beqz $a6, .LBB4_1 + ; LA64-NEXT: b .LBB4_4 + ; LA64-NEXT: .LBB4_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB4_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -168,7 +168,7 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + ; LA64-NEXT: beqz $a6, .LBB5_1 + ; LA64-NEXT: b .LBB5_4 + ; LA64-NEXT: .LBB5_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB5_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -189,7 +189,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + ; LA64-NEXT: beqz $a4, .LBB6_1 + ; LA64-NEXT: b .LBB6_4 + ; LA64-NEXT: .LBB6_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB6_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -210,7 +210,7 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou + ; LA64-NEXT: beqz $a4, .LBB7_1 + ; LA64-NEXT: b .LBB7_4 + ; LA64-NEXT: .LBB7_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB7_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -245,7 +245,7 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: beqz $a6, .LBB8_1 + ; LA64-NEXT: b .LBB8_4 + ; LA64-NEXT: .LBB8_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB8_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -284,7 +284,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: beqz $a6, .LBB9_1 + ; LA64-NEXT: b .LBB9_4 + ; LA64-NEXT: .LBB9_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB9_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -308,7 +308,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + ; LA64-NEXT: beqz $a4, .LBB10_1 + ; LA64-NEXT: b .LBB10_4 + ; LA64-NEXT: .LBB10_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB10_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 +@@ -331,7 +331,7 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw + ; LA64-NEXT: beqz $a4, .LBB11_1 + ; LA64-NEXT: b .LBB11_4 + ; LA64-NEXT: .LBB11_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB11_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 02d481cb3865..589360823b14 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -29,7 +29,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB0_6 + ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -64,7 +64,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB0_6 + ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -103,7 +103,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB1_6 + ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -138,7 +138,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB1_6 + ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -178,7 +178,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB2_6 + ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -214,7 +214,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB2_6 + ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -254,7 +254,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB3_6 + ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -290,7 +290,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB3_6 + ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1385,7 +1385,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB16_6 + ; LA64F-NEXT: .LBB16_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1420,7 +1420,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB16_6 + ; LA64D-NEXT: .LBB16_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1459,7 +1459,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB17_6 + ; LA64F-NEXT: .LBB17_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1494,7 +1494,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB17_6 + ; LA64D-NEXT: .LBB17_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1534,7 +1534,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB18_6 + ; LA64F-NEXT: .LBB18_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1570,7 +1570,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB18_6 + ; LA64D-NEXT: .LBB18_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1610,7 +1610,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB19_6 + ; LA64F-NEXT: .LBB19_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1646,7 +1646,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB19_6 + ; LA64D-NEXT: .LBB19_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2087,7 +2087,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB24_6 + ; LA64F-NEXT: .LBB24_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2122,7 +2122,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB24_6 + ; LA64D-NEXT: .LBB24_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2161,7 +2161,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB25_6 + ; LA64F-NEXT: .LBB25_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2196,7 +2196,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB25_6 + ; LA64D-NEXT: .LBB25_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2236,7 +2236,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB26_6 + ; LA64F-NEXT: .LBB26_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2272,7 +2272,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB26_6 + ; LA64D-NEXT: .LBB26_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2312,7 +2312,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB27_6 + ; LA64F-NEXT: .LBB27_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2348,7 +2348,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB27_6 + ; LA64D-NEXT: .LBB27_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +index 8d6056bc7677..a8b164a4cd3c 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +@@ -5,12 +5,12 @@ + define void @fence_singlethread() { + ; LA32-LABEL: fence_singlethread: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: #MEMBARRIER + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_singlethread: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: #MEMBARRIER + ; LA64-NEXT: ret + fence syncscope("singlethread") seq_cst + ret void +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +index 724639f3c6fb..c5b2232f9b80 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +@@ -5,12 +5,12 @@ + define void @fence_acquire() nounwind { + ; LA32-LABEL: fence_acquire: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_acquire: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + fence acquire + ret void +@@ -19,12 +19,12 @@ define void @fence_acquire() nounwind { + define void @fence_release() nounwind { + ; LA32-LABEL: fence_release: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_release: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 18 + ; LA64-NEXT: ret + fence release + ret void +@@ -33,12 +33,12 @@ define void @fence_release() nounwind { + define void @fence_acq_rel() nounwind { + ; LA32-LABEL: fence_acq_rel: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_acq_rel: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + fence acq_rel + ret void +@@ -47,12 +47,12 @@ define void @fence_acq_rel() nounwind { + define void @fence_seq_cst() nounwind { + ; LA32-LABEL: fence_seq_cst: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_seq_cst: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + fence seq_cst + ret void +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +index deff11723d27..8b170c479eed 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +@@ -6,13 +6,13 @@ define i8 @load_acquire_i8(ptr %ptr) { + ; LA32-LABEL: load_acquire_i8: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.b $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_acquire_i8: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.b $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i8, ptr %ptr acquire, align 1 + ret i8 %val +@@ -22,13 +22,13 @@ define i16 @load_acquire_i16(ptr %ptr) { + ; LA32-LABEL: load_acquire_i16: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.h $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_acquire_i16: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.h $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i16, ptr %ptr acquire, align 2 + ret i16 %val +@@ -38,13 +38,13 @@ define i32 @load_acquire_i32(ptr %ptr) { + ; LA32-LABEL: load_acquire_i32: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_acquire_i32: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.w $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i32, ptr %ptr acquire, align 4 + ret i32 %val +@@ -66,7 +66,7 @@ define i64 @load_acquire_i64(ptr %ptr) { + ; LA64-LABEL: load_acquire_i64: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.d $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i64, ptr %ptr acquire, align 8 + ret i64 %val +@@ -202,13 +202,13 @@ define i8 @load_seq_cst_i8(ptr %ptr) { + ; LA32-LABEL: load_seq_cst_i8: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.b $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_seq_cst_i8: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.b $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i8, ptr %ptr seq_cst, align 1 + ret i8 %val +@@ -218,13 +218,13 @@ define i16 @load_seq_cst_i16(ptr %ptr) { + ; LA32-LABEL: load_seq_cst_i16: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.h $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_seq_cst_i16: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.h $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i16, ptr %ptr seq_cst, align 2 + ret i16 %val +@@ -234,13 +234,13 @@ define i32 @load_seq_cst_i32(ptr %ptr) { + ; LA32-LABEL: load_seq_cst_i32: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_seq_cst_i32: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.w $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i32, ptr %ptr seq_cst, align 4 + ret i32 %val +@@ -262,7 +262,7 @@ define i64 @load_seq_cst_i64(ptr %ptr) { + ; LA64-LABEL: load_seq_cst_i64: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.d $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i64, ptr %ptr seq_cst, align 8 + ret i64 %val +@@ -271,13 +271,13 @@ define i64 @load_seq_cst_i64(ptr %ptr) { + define void @store_release_i8(ptr %ptr, i8 signext %v) { + ; LA32-LABEL: store_release_i8: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: st.b $a1, $a0, 0 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_release_i8: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 18 + ; LA64-NEXT: st.b $a1, $a0, 0 + ; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr release, align 1 +@@ -287,13 +287,13 @@ define void @store_release_i8(ptr %ptr, i8 signext %v) { + define void @store_release_i16(ptr %ptr, i16 signext %v) { + ; LA32-LABEL: store_release_i16: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: st.h $a1, $a0, 0 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_release_i16: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 18 + ; LA64-NEXT: st.h $a1, $a0, 0 + ; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr release, align 2 +@@ -303,7 +303,7 @@ define void @store_release_i16(ptr %ptr, i16 signext %v) { + define void @store_release_i32(ptr %ptr, i32 signext %v) { + ; LA32-LABEL: store_release_i32: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: st.w $a1, $a0, 0 + ; LA32-NEXT: ret + ; +@@ -465,16 +465,16 @@ define void @store_monotonic_i64(ptr %ptr, i64 %v) { + define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { + ; LA32-LABEL: store_seq_cst_i8: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: st.b $a1, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_seq_cst_i8: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: st.b $a1, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr seq_cst, align 1 + ret void +@@ -483,16 +483,16 @@ define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { + define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { + ; LA32-LABEL: store_seq_cst_i16: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: st.h $a1, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_seq_cst_i16: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: st.h $a1, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr seq_cst, align 2 + ret void +@@ -501,9 +501,9 @@ define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { + define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { + ; LA32-LABEL: store_seq_cst_i32: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: st.w $a1, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_seq_cst_i32: +-- +2.20.1 + diff --git a/0003-LowerTypeTests-Add-loongarch64-to-CFI-jumptables-673.patch b/0003-LowerTypeTests-Add-loongarch64-to-CFI-jumptables-673.patch new file mode 100644 index 0000000..d5ff1a4 --- /dev/null +++ b/0003-LowerTypeTests-Add-loongarch64-to-CFI-jumptables-673.patch @@ -0,0 +1,127 @@ +From 866e3ec267eb98445441ba8d9e9fb2129a23c4a1 Mon Sep 17 00:00:00 2001 +From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> +Date: Thu, 28 Sep 2023 15:26:18 +0800 +Subject: [PATCH 03/27] [LowerTypeTests] Add loongarch64 to CFI jumptables + (#67312) + +This patch implements jump tables for loongarch64. + +(cherry picked from commit 0e8a8c85f8765c086c573f36e60c895920381e18) +--- + llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 9 ++++++++- + llvm/test/Transforms/LowerTypeTests/function-weak.ll | 2 ++ + llvm/test/Transforms/LowerTypeTests/function.ll | 9 +++++++++ + 3 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +index 9b4b3efd7283..a89d57d12615 100644 +--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp ++++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +@@ -1196,6 +1196,7 @@ static const unsigned kARMJumpTableEntrySize = 4; + static const unsigned kARMBTIJumpTableEntrySize = 8; + static const unsigned kARMv6MJumpTableEntrySize = 16; + static const unsigned kRISCVJumpTableEntrySize = 8; ++static const unsigned kLOONGARCH64JumpTableEntrySize = 8; + + unsigned LowerTypeTestsModule::getJumpTableEntrySize() { + switch (JumpTableArch) { +@@ -1222,6 +1223,8 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() { + case Triple::riscv32: + case Triple::riscv64: + return kRISCVJumpTableEntrySize; ++ case Triple::loongarch64: ++ return kLOONGARCH64JumpTableEntrySize; + default: + report_fatal_error("Unsupported architecture for jump tables"); + } +@@ -1286,6 +1289,9 @@ void LowerTypeTestsModule::createJumpTableEntry( + } else if (JumpTableArch == Triple::riscv32 || + JumpTableArch == Triple::riscv64) { + AsmOS << "tail $" << ArgIndex << "@plt\n"; ++ } else if (JumpTableArch == Triple::loongarch64) { ++ AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n" ++ << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n"; + } else { + report_fatal_error("Unsupported architecture for jump tables"); + } +@@ -1304,7 +1310,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctions( + ArrayRef TypeIds, ArrayRef Functions) { + if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || + Arch == Triple::thumb || Arch == Triple::aarch64 || +- Arch == Triple::riscv32 || Arch == Triple::riscv64) ++ Arch == Triple::riscv32 || Arch == Triple::riscv64 || ++ Arch == Triple::loongarch64) + buildBitSetsFromFunctionsNative(TypeIds, Functions); + else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) + buildBitSetsFromFunctionsWASM(TypeIds, Functions); +diff --git a/llvm/test/Transforms/LowerTypeTests/function-weak.ll b/llvm/test/Transforms/LowerTypeTests/function-weak.ll +index ff69abacc8e9..c765937f1991 100644 +--- a/llvm/test/Transforms/LowerTypeTests/function-weak.ll ++++ b/llvm/test/Transforms/LowerTypeTests/function-weak.ll +@@ -4,6 +4,7 @@ + ; RUN: opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,ARM %s + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s ++; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,LOONGARCH64 %s + + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" +@@ -116,6 +117,7 @@ define i1 @foo(ptr %p) { + ; X86: define private void @[[JT]]() #{{.*}} align 8 { + ; ARM: define private void @[[JT]]() #{{.*}} align 4 { + ; RISCV: define private void @[[JT]]() #{{.*}} align 8 { ++; LOONGARCH64: define private void @[[JT]]() #{{.*}} align 8 { + + ; CHECK: define internal void @__cfi_global_var_init() section ".text.startup" { + ; CHECK-NEXT: entry: +diff --git a/llvm/test/Transforms/LowerTypeTests/function.ll b/llvm/test/Transforms/LowerTypeTests/function.ll +index 968c9d434eb2..802b88d92977 100644 +--- a/llvm/test/Transforms/LowerTypeTests/function.ll ++++ b/llvm/test/Transforms/LowerTypeTests/function.ll +@@ -5,6 +5,7 @@ + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s + ; RUN: opt -S -passes=lowertypetests -mtriple=wasm32-unknown-unknown %s | FileCheck --check-prefix=WASM32 %s ++; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=LOONGARCH64,NATIVE %s + + ; The right format for Arm jump tables depends on the selected + ; subtarget, so we can't get these tests right without the Arm target +@@ -34,6 +35,7 @@ target datalayout = "e-p:64:64" + ; THUMB: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1) + ; THUMBV6M: @g = internal alias void (), getelementptr inbounds ([2 x [16 x i8]], ptr @[[JT]], i64 0, i64 1) + ; RISCV: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) ++; LOONGARCH64: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) + + ; NATIVE: define hidden void @f.cfi() + ; WASM32: define void @f() !type !{{[0-9]+}} !wasm.index ![[I0:[0-9]+]] +@@ -65,6 +67,7 @@ define i1 @foo(ptr %p) { + ; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 { + ; THUMBV6M: define private void @[[JT]]() #[[ATTR:.*]] align 16 { + ; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 { ++; LOONGARCH64: define private void @[[JT]]() #[[ATTR:.*]] align 8 { + + ; X86: jmp ${0:c}@plt + ; X86-SAME: int3 +@@ -99,6 +102,11 @@ define i1 @foo(ptr %p) { + ; RISCV: tail $0@plt + ; RISCV-SAME: tail $1@plt + ++; LOONGARCH64: pcalau12i $$t0, %pc_hi20($0) ++; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($0) ++; LOONGARCH64-SAME: pcalau12i $$t0, %pc_hi20($1) ++; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($1) ++ + ; NATIVE-SAME: "s,s"(ptr @f.cfi, ptr @g.cfi) + + ; X86-LINUX: attributes #[[ATTR]] = { naked nocf_check nounwind } +@@ -107,6 +115,7 @@ define i1 @foo(ptr %p) { + ; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" } + ; THUMBV6M: attributes #[[ATTR]] = { naked nounwind "target-features"="+thumb-mode" } + ; RISCV: attributes #[[ATTR]] = { naked nounwind "target-features"="-c,-relax" } ++; LOONGARCH64: attributes #[[ATTR]] = { naked nounwind } + + ; WASM32: ![[I0]] = !{i64 1} + ; WASM32: ![[I1]] = !{i64 2} +-- +2.20.1 + diff --git a/0003-PATCH-clang-Don-t-install-static-libraries.patch b/0003-PATCH-clang-Don-t-install-static-libraries.patch new file mode 100644 index 0000000..bc8fa51 --- /dev/null +++ b/0003-PATCH-clang-Don-t-install-static-libraries.patch @@ -0,0 +1,25 @@ +From 88704fc2eabb9dd19a9c3eb81a9b3dc37d95651c Mon Sep 17 00:00:00 2001 +From: Tom Stellard +Date: Fri, 31 Jan 2020 11:04:57 -0800 +Subject: [PATCH][clang] Don't install static libraries + +--- + clang/cmake/modules/AddClang.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake +index 5752f4277444..0f52822d91f0 100644 +--- a/clang/cmake/modules/AddClang.cmake ++++ b/clang/cmake/modules/AddClang.cmake +@@ -113,7 +113,7 @@ macro(add_clang_library name) + if(TARGET ${lib}) + target_link_libraries(${lib} INTERFACE ${LLVM_COMMON_LIBS}) + +- if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ARG_INSTALL_WITH_TOOLCHAIN) ++ if (ARG_SHARED AND (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ARG_INSTALL_WITH_TOOLCHAIN)) + get_target_export_arg(${name} Clang export_to_clangtargets UMBRELLA clang-libraries) + install(TARGETS ${lib} + COMPONENT ${lib} +-- +2.30.2 + diff --git a/0003-lld-LoongArch-Support-the-R_LARCH_CALL36-relocation-.patch b/0003-lld-LoongArch-Support-the-R_LARCH_CALL36-relocation-.patch new file mode 100644 index 0000000..28aafa7 --- /dev/null +++ b/0003-lld-LoongArch-Support-the-R_LARCH_CALL36-relocation-.patch @@ -0,0 +1,136 @@ +From 2a451ca1c5ab4294e8ab876e4551c3b037d5c997 Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Mon, 25 Dec 2023 17:40:48 +0800 +Subject: [PATCH 03/23] [lld][LoongArch] Support the R_LARCH_CALL36 relocation + type (#73346) + +R_LARCH_CALL36 was designed for function call on medium code model where +the 2 instructions (pcaddu18i + jirl) must be adjacent. This is expected +to replace current medium code model implementation, i.e. +R_LARCH_PCALA_{HI20,LO12} on pcalau12i + jirl. + +See https://github.com/loongson/la-abi-specs/pull/3 for more details. + +(cherry picked from commit 88548df0fc08364bd03148c936e36f0bb07dde8a) +--- + lld/ELF/Arch/LoongArch.cpp | 20 ++++++++++ + lld/test/ELF/loongarch-call36.s | 69 +++++++++++++++++++++++++++++++++ + 2 files changed, 89 insertions(+) + create mode 100644 lld/test/ELF/loongarch-call36.s + +diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp +index 160fab4aeba9..72d9c6838e31 100644 +--- a/lld/ELF/Arch/LoongArch.cpp ++++ b/lld/ELF/Arch/LoongArch.cpp +@@ -479,6 +479,7 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, + case R_LARCH_B16: + case R_LARCH_B21: + case R_LARCH_B26: ++ case R_LARCH_CALL36: + return R_PLT_PC; + case R_LARCH_GOT_PC_HI20: + case R_LARCH_GOT64_PC_LO20: +@@ -607,6 +608,25 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, + write32le(loc, setD10k16(read32le(loc), val >> 2)); + return; + ++ case R_LARCH_CALL36: { ++ // This relocation is designed for adjancent pcaddu18i+jirl pairs that ++ // are patched in one time. Because of sign extension of these insns' ++ // immediate fields, the relocation range is [-128G - 0x20000, +128G - ++ // 0x20000) (of course must be 4-byte aligned). ++ if (((int64_t)val + 0x20000) != llvm::SignExtend64(val + 0x20000, 38)) ++ reportRangeError(loc, rel, Twine(val), llvm::minIntN(38) - 0x20000, ++ llvm::maxIntN(38) - 0x20000); ++ checkAlignment(loc, val, 4, rel); ++ // Since jirl performs sign extension on the offset immediate, adds (1<<17) ++ // to original val to get the correct hi20. ++ uint32_t hi20 = extractBits(val + (1 << 17), 37, 18); ++ // Despite the name, the lower part is actually 18 bits with 4-byte aligned. ++ uint32_t lo16 = extractBits(val, 17, 2); ++ write32le(loc, setJ20(read32le(loc), hi20)); ++ write32le(loc + 4, setK16(read32le(loc + 4), lo16)); ++ return; ++ } ++ + // Relocs intended for `addi`, `ld` or `st`. + case R_LARCH_PCALA_LO12: + // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12 +diff --git a/lld/test/ELF/loongarch-call36.s b/lld/test/ELF/loongarch-call36.s +new file mode 100644 +index 000000000000..2d25a2ac64ed +--- /dev/null ++++ b/lld/test/ELF/loongarch-call36.s +@@ -0,0 +1,69 @@ ++# REQUIRES: loongarch ++ ++# RUN: rm -rf %t && split-file %s %t ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/a.s -o %t/a.o ++ ++# RUN: ld.lld %t/a.o --section-start=.text=0x20010 --section-start=.sec.foo=0x60020 -o %t/exe1 ++# RUN: llvm-objdump --no-show-raw-insn -d %t/exe1 | FileCheck --match-full-lines %s --check-prefix=EXE1 ++## hi20 = target - pc + (1 << 17) >> 18 = 0x60020 - 0x20010 + 0x20000 >> 18 = 1 ++## lo18 = target - pc & (1 << 18) - 1 = 0x60020 - 0x20010 & 0x3ffff = 16 ++# EXE1: 20010: pcaddu18i $t0, 1 ++# EXE1-NEXT: 20014: jirl $zero, $t0, 16 ++ ++# RUN: ld.lld %t/a.o --section-start=.text=0x20010 --section-start=.sec.foo=0x40020 -o %t/exe2 ++# RUN: llvm-objdump --no-show-raw-insn -d %t/exe2 | FileCheck --match-full-lines %s --check-prefix=EXE2 ++## hi20 = target - pc + (1 << 17) >> 18 = 0x40020 - 0x20010 + 0x20000 >> 18 = 1 ++## lo18 = target - pc & (1 << 18) - 1 = 0x40020 - 0x20010 & 0x3ffff = -131056 ++# EXE2: 20010: pcaddu18i $t0, 1 ++# EXE2-NEXT: 20014: jirl $zero, $t0, -131056 ++ ++# RUN: ld.lld %t/a.o -shared -T %t/a.t -o %t/a.so ++# RUN: llvm-readelf -x .got.plt %t/a.so | FileCheck --check-prefix=GOTPLT %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/a.so | FileCheck --check-prefix=SO %s ++## PLT should be present in this case. ++# SO: Disassembly of section .plt: ++# SO: <.plt>: ++## foo@plt: ++# SO: 1234520: pcaddu12i $t3, 64{{$}} ++# SO-NEXT: ld.d $t3, $t3, 544{{$}} ++# SO-NEXT: jirl $t1, $t3, 0 ++# SO-NEXT: nop ++ ++# SO: Disassembly of section .text: ++# SO: <_start>: ++## hi20 = foo@plt - pc + (1 << 17) >> 18 = 0x1234520 - 0x1274670 + 0x20000 >> 18 = -1 ++## lo18 = foo@plt - pc & (1 << 18) - 1 = 0x1234520 - 0x1274670 & 0x3ffff = -336 ++# SO-NEXT: pcaddu18i $t0, -1{{$}} ++# SO-NEXT: jirl $zero, $t0, -336{{$}} ++ ++# GOTPLT: section '.got.plt': ++# GOTPLT-NEXT: 0x01274730 00000000 00000000 00000000 00000000 ++# GOTPLT-NEXT: 0x01274740 00452301 00000000 ++ ++# RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x2000020000 -o /dev/null 2>&1 | \ ++# RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-RANGE %s ++# ERROR-RANGE: error: [[FILE]]:(.text+0x0): relocation R_LARCH_CALL36 out of range: 137438953472 is not in [-137439084544, 137438822399]; references 'foo' ++ ++## Impossible case in reality becasue all LoongArch instructions are fixed 4-bytes long. ++# RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x40001 -o /dev/null 2>&1 | \ ++# RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-ALIGN %s ++# ERROR-ALIGN: error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_CALL36: 0x20001 is not aligned to 4 bytes ++ ++#--- a.t ++SECTIONS { ++ .plt 0x1234500: { *(.plt) } ++ .text 0x1274670: { *(.text) } ++} ++ ++#--- a.s ++.text ++.global _start ++_start: ++ .reloc ., R_LARCH_CALL36, foo ++ pcaddu18i $t0, 0 ++ jirl $zero, $t0, 0 ++ ++.section .sec.foo,"ax" ++.global foo ++foo: ++ ret +-- +2.20.1 + diff --git a/0004-CFI-Allow-LoongArch-67314.patch b/0004-CFI-Allow-LoongArch-67314.patch new file mode 100644 index 0000000..3237b65 --- /dev/null +++ b/0004-CFI-Allow-LoongArch-67314.patch @@ -0,0 +1,100 @@ +From 9e977e153a4b7d69fe9e2d6b6defa7f3bb518b75 Mon Sep 17 00:00:00 2001 +From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> +Date: Thu, 28 Sep 2023 15:40:42 +0800 +Subject: [PATCH 04/27] [CFI] Allow LoongArch (#67314) + +Enable icall tests on loongarch64 and `check-cfi` all pass. + +(cherry picked from commit adb555ea369a3a989a9db619c784aa76cccdb823) +--- + compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake | 2 +- + compiler-rt/lib/cfi/cfi.cpp | 4 ++++ + compiler-rt/test/cfi/cross-dso/icall/dlopen.cpp | 11 +++++++++-- + compiler-rt/test/cfi/cross-dso/icall/lit.local.cfg.py | 2 +- + compiler-rt/test/cfi/icall/lit.local.cfg.py | 2 +- + 5 files changed, 16 insertions(+), 5 deletions(-) + +diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +index d14745ef9d13..9b0a4655cd65 100644 +--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake ++++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +@@ -73,7 +73,7 @@ set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64} + set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64} + ${HEXAGON} ${LOONGARCH64}) + set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS64} +- ${HEXAGON}) ++ ${HEXAGON} ${LOONGARCH64}) + set(ALL_SCUDO_STANDALONE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} + ${MIPS32} ${MIPS64} ${PPC64} ${HEXAGON} ${LOONGARCH64} ${RISCV64}) + if(APPLE) +diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp +index 22f0b175dd87..ad1c91623514 100644 +--- a/compiler-rt/lib/cfi/cfi.cpp ++++ b/compiler-rt/lib/cfi/cfi.cpp +@@ -51,7 +51,11 @@ using namespace __sanitizer; + + namespace __cfi { + ++#if SANITIZER_LOONGARCH64 ++#define kCfiShadowLimitsStorageSize 16384 // 16KiB on loongarch64 per page ++#else + #define kCfiShadowLimitsStorageSize 4096 // 1 page ++#endif + // Lets hope that the data segment is mapped with 4K pages. + // The pointer to the cfi shadow region is stored at the start of this page. + // The rest of the page is unused and re-mapped read-only. +diff --git a/compiler-rt/test/cfi/cross-dso/icall/dlopen.cpp b/compiler-rt/test/cfi/cross-dso/icall/dlopen.cpp +index c9674c3fb412..d04f7ba5dd0e 100644 +--- a/compiler-rt/test/cfi/cross-dso/icall/dlopen.cpp ++++ b/compiler-rt/test/cfi/cross-dso/icall/dlopen.cpp +@@ -53,6 +53,13 @@ struct A { + virtual void f(); + }; + ++// The page size of LoongArch is 16KiB, aligned to the memory page size. ++#ifdef __loongarch__ ++# define PAGESIZE 16384 ++#else ++# define PAGESIZE 4096 ++#endif ++ + #ifdef SHARED_LIB + + #include "../../utils.h" +@@ -66,13 +73,13 @@ extern "C" void *create_B() { + return (void *)(new B()); + } + +-extern "C" __attribute__((aligned(4096))) void do_nothing() {} ++extern "C" __attribute__((aligned(PAGESIZE))) void do_nothing() {} + + #else + + void A::f() {} + +-static const int kCodeAlign = 4096; ++static const int kCodeAlign = PAGESIZE; + static const int kCodeSize = 4096; + static char saved_code[kCodeSize]; + static char *real_start; +diff --git a/compiler-rt/test/cfi/cross-dso/icall/lit.local.cfg.py b/compiler-rt/test/cfi/cross-dso/icall/lit.local.cfg.py +index 749c265bbf1c..6e64199ed5c5 100644 +--- a/compiler-rt/test/cfi/cross-dso/icall/lit.local.cfg.py ++++ b/compiler-rt/test/cfi/cross-dso/icall/lit.local.cfg.py +@@ -1,3 +1,3 @@ + # The cfi-icall checker is only supported on x86 and x86_64 for now. +-if config.root.host_arch not in ["x86", "x86_64"]: ++if config.root.host_arch not in ["x86", "x86_64", "loongarch64"]: + config.unsupported = True +diff --git a/compiler-rt/test/cfi/icall/lit.local.cfg.py b/compiler-rt/test/cfi/icall/lit.local.cfg.py +index 749c265bbf1c..6e64199ed5c5 100644 +--- a/compiler-rt/test/cfi/icall/lit.local.cfg.py ++++ b/compiler-rt/test/cfi/icall/lit.local.cfg.py +@@ -1,3 +1,3 @@ + # The cfi-icall checker is only supported on x86 and x86_64 for now. +-if config.root.host_arch not in ["x86", "x86_64"]: ++if config.root.host_arch not in ["x86", "x86_64", "loongarch64"]: + config.unsupported = True +-- +2.20.1 + diff --git a/0004-LoongArch-Add-LSX-intrinsic-testcases.patch b/0004-LoongArch-Add-LSX-intrinsic-testcases.patch new file mode 100644 index 0000000..9b1a6ff --- /dev/null +++ b/0004-LoongArch-Add-LSX-intrinsic-testcases.patch @@ -0,0 +1,9905 @@ +From a818acf6c9a103bbc0af472b54b1d78330e36f79 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:10:41 +0800 +Subject: [PATCH 04/42] [LoongArch] Add LSX intrinsic testcases + +Depends on D155829 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D155834 + +(cherry picked from commit f3aa4416319aed198841401c6c9dc2e49afe2507) + +--- + .../CodeGen/LoongArch/lsx/intrinsic-absd.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-add.ll | 62 ++ + .../CodeGen/LoongArch/lsx/intrinsic-adda.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-addi.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-addw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-and.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-andi.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-andn.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-avg.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-avgr.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-bitclr.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-bitrev.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-bitsel.ll | 14 + + .../LoongArch/lsx/intrinsic-bitseli.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-bitset.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-bsll.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-bsrl.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-clo.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-clz.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-div.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-exth.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-extl.ll | 26 + + .../LoongArch/lsx/intrinsic-extrins.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-fadd.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fclass.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fcmp.ll | 530 ++++++++++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-fcvt.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fcvth.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fdiv.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-ffint.ll | 86 +++ + .../CodeGen/LoongArch/lsx/intrinsic-flogb.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmadd.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmax.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmin.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmina.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmsub.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmul.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-frecip.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-frint.ll | 122 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-frstp.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fsub.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-ftint.ll | 350 ++++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-haddw.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-hsubw.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ilv.ll | 98 ++++ + .../LoongArch/lsx/intrinsic-insgr2vr.ll | 54 ++ + .../CodeGen/LoongArch/lsx/intrinsic-ld.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-ldi.ll | 62 ++ + .../CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-madd.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-maddw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-max.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-min.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-mod.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-mskgez.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-mskltz.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-msknz.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-msub.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-muh.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-mul.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-mulw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-neg.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-nor.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-nori.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-or.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-ori.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-orn.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-pack.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-pcnt.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-permi.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-pick.ll | 98 ++++ + .../LoongArch/lsx/intrinsic-pickve2gr.ll | 98 ++++ + .../LoongArch/lsx/intrinsic-replgr2vr.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-replve.ll | 50 ++ + .../LoongArch/lsx/intrinsic-replvei.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-rotr.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-sadd.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-sat.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-seq.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-set.ll | 38 ++ + .../LoongArch/lsx/intrinsic-setallnez.ll | 74 +++ + .../LoongArch/lsx/intrinsic-setanyeqz.ll | 74 +++ + .../CodeGen/LoongArch/lsx/intrinsic-shuf.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll | 50 ++ + .../LoongArch/lsx/intrinsic-signcov.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-sle.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-sll.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-sllwil.ll | 74 +++ + .../CodeGen/LoongArch/lsx/intrinsic-slt.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-sra.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-sran.ll | 38 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srani.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srar.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-srarn.ll | 38 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srarni.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srl.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-srln.ll | 38 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srlni.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srlr.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-srlrn.ll | 38 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srlrni.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-ssran.ll | 74 +++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrani.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll | 74 +++ + .../LoongArch/lsx/intrinsic-ssrarni.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrln.ll | 74 +++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll | 74 +++ + .../LoongArch/lsx/intrinsic-ssrlrni.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ssub.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-st.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-stelm.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-sub.ll | 62 ++ + .../CodeGen/LoongArch/lsx/intrinsic-subi.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-subw.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-xor.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-xori.ll | 14 + + 123 files changed, 8902 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll +new file mode 100644 +index 000000000000..811d9d712de4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vabsd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vabsd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vabsd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vabsd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vabsd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vabsd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vabsd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vabsd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll +new file mode 100644 +index 000000000000..fac16c8308da +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vadd_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.q $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll +new file mode 100644 +index 000000000000..79be0a184bfb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vadda_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vadda_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vadda_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vadda_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vadda_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vadda_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vadda_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vadda_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll +new file mode 100644 +index 000000000000..b9134e0724fe +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vaddi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vaddi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vaddi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vaddi_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vaddi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll +new file mode 100644 +index 000000000000..086e3bec12d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.h.bu.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.w.hu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.d.wu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.q.du.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.h.bu.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.w.hu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.d.wu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.q.du.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll +new file mode 100644 +index 000000000000..77496239c3a9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vand_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vand_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll +new file mode 100644 +index 000000000000..9a1c38a641d0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vandi_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vandi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vandi.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll +new file mode 100644 +index 000000000000..b08c759ecc32 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vandn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vandn_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll +new file mode 100644 +index 000000000000..fb0861f4cd5e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vavg_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vavg_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vavg_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vavg_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vavg_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vavg_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vavg_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vavg_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll +new file mode 100644 +index 000000000000..8bf7d0ed8817 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vavgr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vavgr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vavgr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vavgr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vavgr_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vavgr_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vavgr_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vavgr_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll +new file mode 100644 +index 000000000000..f5fba6dbb141 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vbitclr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitclr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vbitclr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitclr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vbitclr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitclr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vbitclr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitclr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbitclri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclri.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vbitclri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclri.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vbitclri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclri.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vbitclri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclri.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll +new file mode 100644 +index 000000000000..ad56e88fdb88 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vbitrev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitrev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrev.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vbitrev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitrev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrev.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vbitrev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitrev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrev.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vbitrev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitrev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrev.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbitrevi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrevi.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vbitrevi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrevi.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vbitrevi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vbitrevi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll +new file mode 100644 +index 000000000000..4b4b5ff1fc8c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vbitsel_v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vbitsel_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitsel.v $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll +new file mode 100644 +index 000000000000..28d342b5c378 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitseli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 255) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll +new file mode 100644 +index 000000000000..75d98e6f8bce +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vbitset_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitset_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitset.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vbitset_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitset_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitset.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vbitset_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitset_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitset.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vbitset_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitset_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitset.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbitseti_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseti.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vbitseti_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseti.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vbitseti_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseti.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vbitseti_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseti.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll +new file mode 100644 +index 000000000000..e7eb1cfcb407 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsll_v(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbsll_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbsll.v $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll +new file mode 100644 +index 000000000000..fe0565297641 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbsrl_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbsrl.v $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll +new file mode 100644 +index 000000000000..c581109f3fd0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vclo_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vclo_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclo.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vclo_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vclo_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclo.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vclo_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vclo_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclo.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vclo_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vclo_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclo.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll +new file mode 100644 +index 000000000000..25c37b64349b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vclz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vclz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclz.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vclz_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vclz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclz.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vclz_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vclz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclz.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vclz_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vclz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclz.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll +new file mode 100644 +index 000000000000..53166e84d269 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vdiv_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vdiv_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vdiv_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vdiv_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vdiv_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vdiv_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vdiv_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vdiv_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll +new file mode 100644 +index 000000000000..2f3e891a9eef +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8>) ++ ++define <8 x i16> @lsx_vexth_h_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.h.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16>) ++ ++define <4 x i32> @lsx_vexth_w_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.w.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32>) ++ ++define <2 x i64> @lsx_vexth_d_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.d.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vexth_q_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.q.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8>) ++ ++define <8 x i16> @lsx_vexth_hu_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.hu.bu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16>) ++ ++define <4 x i32> @lsx_vexth_wu_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.wu.hu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32>) ++ ++define <2 x i64> @lsx_vexth_du_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.du.wu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64>) ++ ++define <2 x i64> @lsx_vexth_qu_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.qu.du $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll +new file mode 100644 +index 000000000000..cbf19e2a3919 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vextl_q_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vextl_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextl.q.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64>) ++ ++define <2 x i64> @lsx_vextl_qu_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vextl_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextl.qu.du $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll +new file mode 100644 +index 000000000000..8f03a2b81291 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vextrins_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextrins.b $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 255) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vextrins_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextrins.h $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 255) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vextrins_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextrins.w $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 255) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vextrins_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextrins.d $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 255) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll +new file mode 100644 +index 000000000000..569002314c92 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfadd_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfadd_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll +new file mode 100644 +index 000000000000..0c6682187101 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float>) ++ ++define <4 x i32> @lsx_vfclass_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfclass_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfclass.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double>) ++ ++define <2 x i64> @lsx_vfclass_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfclass_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfclass.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll +new file mode 100644 +index 000000000000..669c53b73b16 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll +@@ -0,0 +1,530 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_caf_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_caf_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.caf.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_caf_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_caf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.caf.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cun_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cun_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cun.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cun_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cun_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cun.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_ceq_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_ceq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_ceq_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_ceq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cueq_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cueq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cueq_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cueq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_clt_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_clt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_clt_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_clt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cult_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cult_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cult.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cult_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cult_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cult.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cle_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cle_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cle_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cule_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cule_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cule.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cule_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cule_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cule.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cne_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cne.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cne_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cne.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cor_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cor_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cor.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cor_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cor_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cor.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cune_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cune_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cune.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cune_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cune_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cune.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_saf_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_saf_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.saf.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_saf_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_saf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.saf.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sun_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sun_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sun.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sun_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sun_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sun.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_seq_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_seq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.seq.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_seq_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_seq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.seq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sueq_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sueq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sueq.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sueq_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sueq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sueq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_slt_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_slt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.slt.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_slt_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_slt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.slt.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sult_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sult_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sult.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sult_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sult_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sult.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sle_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sle_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sle.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sle_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sle.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sule_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sule_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sule.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sule_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sule_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sule.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sne_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sne.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sne_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sne.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sor_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sor_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sor.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sor_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sor_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sor.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sune_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sune_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sune.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sune_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sune_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sune.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll +new file mode 100644 +index 000000000000..a6a151a96d84 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float>, <4 x float>) ++ ++define <8 x i16> @lsx_vfcvt_h_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcvt_h_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvt.h.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> %va, <4 x float> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double>, <2 x double>) ++ ++define <4 x float> @lsx_vfcvt_s_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcvt_s_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvt.s.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x float> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll +new file mode 100644 +index 000000000000..a9e4328bd011 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16>) ++ ++define <4 x float> @lsx_vfcvth_s_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vfcvth_s_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvth.s.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float>) ++ ++define <2 x double> @lsx_vfcvth_d_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfcvth_d_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvth.d.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll +new file mode 100644 +index 000000000000..9a69964bb227 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16>) ++ ++define <4 x float> @lsx_vfcvtl_s_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vfcvtl_s_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvtl.s.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float>) ++ ++define <2 x double> @lsx_vfcvtl_d_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfcvtl_d_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvtl.d.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll +new file mode 100644 +index 000000000000..1ca8e5e2c0e9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfdiv_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfdiv_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfdiv.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfdiv_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfdiv_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfdiv.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll +new file mode 100644 +index 000000000000..62fbcfa339cd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll +@@ -0,0 +1,86 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32>) ++ ++define <4 x float> @lsx_vffint_s_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vffint_s_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.s.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64>) ++ ++define <2 x double> @lsx_vffint_d_l(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vffint_d_l: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.d.l $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32>) ++ ++define <4 x float> @lsx_vffint_s_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vffint_s_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.s.wu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64>) ++ ++define <2 x double> @lsx_vffint_d_lu(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vffint_d_lu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.d.lu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> %va) ++ ret <2 x double> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32>) ++ ++define <2 x double> @lsx_vffintl_d_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vffintl_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffintl.d.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> %va) ++ ret <2 x double> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32>) ++ ++define <2 x double> @lsx_vffinth_d_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vffinth_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffinth.d.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64>, <2 x i64>) ++ ++define <4 x float> @lsx_vffint_s_l(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vffint_s_l: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.s.l $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x float> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll +new file mode 100644 +index 000000000000..d8382acc70ed +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float>) ++ ++define <4 x float> @lsx_vflogb_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vflogb_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vflogb.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double>) ++ ++define <2 x double> @lsx_vflogb_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vflogb_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vflogb.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll +new file mode 100644 +index 000000000000..adbaf6c76b1b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float>, <4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { ++; CHECK-LABEL: lsx_vfmadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double>, <2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { ++; CHECK-LABEL: lsx_vfmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll +new file mode 100644 +index 000000000000..89f757c4e456 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmax_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmax_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmax.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmax_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmax_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmax.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll +new file mode 100644 +index 000000000000..5662acc0b9a1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmaxa_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmaxa_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmaxa.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmaxa_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmaxa_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmaxa.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll +new file mode 100644 +index 000000000000..0f844240277f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmin_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmin_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmin.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmin_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmin_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmin.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll +new file mode 100644 +index 000000000000..27f70b5fba32 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmina_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmina_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmina.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmina_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmina_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmina.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll +new file mode 100644 +index 000000000000..856ca9cadbd9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float>, <4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { ++; CHECK-LABEL: lsx_vfmsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmsub.s $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double>, <2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { ++; CHECK-LABEL: lsx_vfmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmsub.d $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll +new file mode 100644 +index 000000000000..1e6c4c77d536 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmul_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmul_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmul_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmul_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmul.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll +new file mode 100644 +index 000000000000..e1a9ea78ef9d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float>, <4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfnmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { ++; CHECK-LABEL: lsx_vfnmadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfnmadd.s $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double>, <2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfnmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { ++; CHECK-LABEL: lsx_vfnmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfnmadd.d $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll +new file mode 100644 +index 000000000000..46db0f4a5061 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float>, <4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfnmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { ++; CHECK-LABEL: lsx_vfnmsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double>, <2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfnmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { ++; CHECK-LABEL: lsx_vfnmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll +new file mode 100644 +index 000000000000..669fde5912d4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrecip_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrecip_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrecip.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrecip_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrecip_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrecip.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll +new file mode 100644 +index 000000000000..8d872fc72962 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrintrne_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrne.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrintrne_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrne.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrintrz_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrz_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrz.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrintrz_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrz.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrintrp_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrp_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrp.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrintrp_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrp_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrp.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrintrm_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrm_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrm.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrintrm_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrm_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrm.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrint_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrint_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrint.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrint_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrint_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrint.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll +new file mode 100644 +index 000000000000..326d87308b0b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrsqrt_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrsqrt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrsqrt_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrsqrt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll +new file mode 100644 +index 000000000000..5c072b194d4f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vfrstp_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vfrstp_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrstp.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16>, <8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vfrstp_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vfrstp_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrstp.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vfrstpi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrstpi.b $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vfrstpi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrstpi.h $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll +new file mode 100644 +index 000000000000..55bffba9e99e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float>) ++ ++define <4 x float> @lsx_vfsqrt_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfsqrt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfsqrt.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double>) ++ ++define <2 x double> @lsx_vfsqrt_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfsqrt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfsqrt.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll +new file mode 100644 +index 000000000000..2beba4a70dc9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfsub_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfsub_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll +new file mode 100644 +index 000000000000..2a494cd7fa87 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll +@@ -0,0 +1,350 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrne_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrne_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrne.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrne_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrne_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrne.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrz_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrz_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrz_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrz_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrp_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrp_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrp.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrp_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrp_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrp.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrm_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrm_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrm.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrm_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrm_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrm.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftint_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftint_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftint_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftint_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrz_wu_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrz_wu_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrz_lu_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrz_lu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftint_wu_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftint_wu_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.wu.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftint_lu_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftint_lu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.lu.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftintrne_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftintrne_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrne.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftintrz_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftintrz_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftintrp_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftintrp_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrp.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftintrm_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftintrm_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrm.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftint_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftint_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrnel_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrnel_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrnel.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrneh_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrneh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrneh.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrzl_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrzl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrzl.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrzh_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrzh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrzh.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrpl_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrpl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrpl.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrph_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrph_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrph.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrml_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrml_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrml.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrmh_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrmh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrmh.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintl_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintl.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftinth_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftinth_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftinth.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll +new file mode 100644 +index 000000000000..05725582334a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vhaddw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vhaddw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vhaddw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vhaddw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vhaddw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.hu.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vhaddw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.wu.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vhaddw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.du.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vhaddw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.qu.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll +new file mode 100644 +index 000000000000..dd5815b2ea85 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vhsubw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vhsubw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vhsubw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vhsubw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vhsubw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.hu.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vhsubw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.wu.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vhsubw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.du.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vhsubw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.qu.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll +new file mode 100644 +index 000000000000..77b0b3484df8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vilvl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vilvl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vilvl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vilvl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvl.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vilvh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvh_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vilvh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvh_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vilvh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvh_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vilvh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvh_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvh.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll +new file mode 100644 +index 000000000000..61d2cbd28066 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll +@@ -0,0 +1,54 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) ++ ++define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vinsgr2vr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) ++ ++define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vinsgr2vr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 7) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) ++ ++define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vinsgr2vr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 3) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) ++ ++define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vinsgr2vr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll +new file mode 100644 +index 000000000000..b9e2ff8088d8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) ++ ++define <16 x i8> @lsx_vld(i8* %p) nounwind { ++; CHECK-LABEL: lsx_vld: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vldx(i8*, i64) ++ ++define <16 x i8> @lsx_vldx(i8* %p, i64 %b) nounwind { ++; CHECK-LABEL: lsx_vldx: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldx $vr0, $a0, $a1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldx(i8* %p, i64 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll +new file mode 100644 +index 000000000000..ace910b54d9a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) ++ ++define <2 x i64> @lsx_vldi() nounwind { ++; CHECK-LABEL: lsx_vldi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldi $vr0, 4095 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4095) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) ++ ++define <16 x i8> @lsx_vrepli_b() nounwind { ++; CHECK-LABEL: lsx_vrepli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.b $vr0, 511 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 511) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) ++ ++define <8 x i16> @lsx_vrepli_h() nounwind { ++; CHECK-LABEL: lsx_vrepli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.h $vr0, 511 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 511) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) ++ ++define <4 x i32> @lsx_vrepli_w() nounwind { ++; CHECK-LABEL: lsx_vrepli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.w $vr0, 511 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 511) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) ++ ++define <2 x i64> @lsx_vrepli_d() nounwind { ++; CHECK-LABEL: lsx_vrepli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.d $vr0, 511 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 511) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll +new file mode 100644 +index 000000000000..1a9cf3d3a766 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) ++ ++define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vldrepl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldrepl.b $vr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) ++ ++define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vldrepl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldrepl.h $vr0, $a0, 2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) ++ ++define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vldrepl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldrepl.w $vr0, $a0, 4 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 4) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) ++ ++define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vldrepl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldrepl.d $vr0, $a0, 8 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 8) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll +new file mode 100644 +index 000000000000..89503724fd73 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmadd_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmadd.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16>, <8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmadd_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmadd.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32>, <4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmadd_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmadd.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmadd_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmadd.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll +new file mode 100644 +index 000000000000..1e3ab25a5fcf +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwev_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.h.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwev_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.w.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwev_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.d.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwev_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.q.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwev_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.h.bu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwev_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.w.hu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwev_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.d.wu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwev_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.q.du $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwev_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.h.bu.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwev_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.w.hu.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwev_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.d.wu.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.q.du.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwod_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.h.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwod_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.w.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwod_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.d.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwod_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.q.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwod_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.h.bu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwod_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.w.hu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwod_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.d.wu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwod_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.q.du $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwod_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.h.bu.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwod_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.w.hu.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwod_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.d.wu.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.q.du.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll +new file mode 100644 +index 000000000000..4dd289cf6ed7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmax_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmax_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmax_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmax_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.b $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.h $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.w $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 15) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.d $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 15) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmax_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmax_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmax_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmax_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.bu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.hu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll +new file mode 100644 +index 000000000000..aa12a5ead6a3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmin_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmin_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmin_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmin_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.w $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.d $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmin_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmin_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmin_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmin_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.bu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.hu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll +new file mode 100644 +index 000000000000..6b3dc6865584 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmod_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmod_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmod_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmod_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll +new file mode 100644 +index 000000000000..3ecd777aee67 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vmskgez_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmskgez_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskgez.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll +new file mode 100644 +index 000000000000..be00c76137c7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vmskltz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmskltz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskltz.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vmskltz_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmskltz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskltz.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vmskltz_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmskltz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskltz.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vmskltz_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmskltz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskltz.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll +new file mode 100644 +index 000000000000..02f1752f7190 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vmsknz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmsknz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsknz.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll +new file mode 100644 +index 000000000000..98684e10c78e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmsub_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmsub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsub.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16>, <8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmsub_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmsub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsub.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32>, <4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmsub_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmsub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsub.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmsub_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsub.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll +new file mode 100644 +index 000000000000..a4deb8f8f823 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmuh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmuh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmuh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmuh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmuh_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmuh_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmuh_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmuh_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll +new file mode 100644 +index 000000000000..aca60d1663b7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmul_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmul_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmul_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmul_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmul_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmul_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmul_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmul_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll +new file mode 100644 +index 000000000000..eb55c1f809e3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.q.du.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.q.du.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll +new file mode 100644 +index 000000000000..43c6e9757614 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vneg_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vneg_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vneg.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vneg_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vneg_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vneg.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vneg_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vneg_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vneg.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vneg_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vneg_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vneg.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll +new file mode 100644 +index 000000000000..16619225f2d1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vnor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vnor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll +new file mode 100644 +index 000000000000..c2388a1e0da3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vnori_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vnori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vnori.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll +new file mode 100644 +index 000000000000..ab557003d150 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll +new file mode 100644 +index 000000000000..85c0f432c54a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vori_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vori.b $vr0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 3) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll +new file mode 100644 +index 000000000000..4528628e02c3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vorn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vorn_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll +new file mode 100644 +index 000000000000..70a3620d1757 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vpackev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vpackev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vpackev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackev.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vpackev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vpackod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackod.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vpackod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackod.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vpackod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackod.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vpackod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll +new file mode 100644 +index 000000000000..431b270ab0a1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vpcnt_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vpcnt_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpcnt.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vpcnt_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vpcnt_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpcnt.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vpcnt_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vpcnt_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpcnt.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vpcnt_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vpcnt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpcnt.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll +new file mode 100644 +index 000000000000..b8367d98caf6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpermi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpermi.w $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 255) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll +new file mode 100644 +index 000000000000..4ebf29e1409c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vpickev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vpickev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vpickev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickev.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vpickev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickev.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vpickod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickod.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vpickod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickod.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vpickod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickod.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vpickod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickod.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll +new file mode 100644 +index 000000000000..ed56d30ce3c4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 15) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 7) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 3) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 1) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.bu $a0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 15) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 7) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.wu $a0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 3) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.du $a0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 1) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll +new file mode 100644 +index 000000000000..091f1c98c228 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) ++ ++define <16 x i8> @lsx_vreplgr2vr_b(i32 %a) nounwind { ++; CHECK-LABEL: lsx_vreplgr2vr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.b $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32) ++ ++define <8 x i16> @lsx_vreplgr2vr_h(i32 %a) nounwind { ++; CHECK-LABEL: lsx_vreplgr2vr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32) ++ ++define <4 x i32> @lsx_vreplgr2vr_w(i32 %a) nounwind { ++; CHECK-LABEL: lsx_vreplgr2vr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64) ++ ++define <2 x i64> @lsx_vreplgr2vr_d(i64 %a) nounwind { ++; CHECK-LABEL: lsx_vreplgr2vr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll +new file mode 100644 +index 000000000000..3ba184dad052 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vreplve_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vreplve_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vreplve_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vreplve_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vreplve_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vreplve_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vreplve_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vreplve_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll +new file mode 100644 +index 000000000000..9b8af1878cb8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vreplvei_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplvei.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vreplvei_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplvei.h $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 7) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vreplvei_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 3) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vreplvei_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll +new file mode 100644 +index 000000000000..df8650677147 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vrotr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vrotr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vrotr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vrotr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vrotr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vrotr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vrotr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vrotr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotr.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vrotri_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vrotri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotri.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vrotri_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vrotri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotri.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vrotri_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vrotri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotri.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vrotri_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vrotri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotri.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll +new file mode 100644 +index 000000000000..a54f955766df +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsadd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsadd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsadd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsadd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll +new file mode 100644 +index 000000000000..4286842a63b9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.h $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.w $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.d $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.bu $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.hu $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.du $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll +new file mode 100644 +index 000000000000..3cb4acd82439 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vseq_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vseq_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseq.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vseq_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vseq_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseq.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vseq_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vseq_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseq.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vseq_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vseq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vseqi_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vseqi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vseqi_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vseqi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vseqi_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vseqi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseqi.w $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vseqi_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vseqi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseqi.d $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -16) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll +new file mode 100644 +index 000000000000..3188fb4e2c2e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.bz.v(<16 x i8>) ++ ++define i32 @lsx_bz_v(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_bz_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseteqz.v $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bnz.v(<16 x i8>) ++ ++define i32 @lsx_bnz_v(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetnez.v $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll +new file mode 100644 +index 000000000000..22e01922e87b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.bnz.b(<16 x i8>) ++ ++define i32 @lsx_bnz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetallnez.b $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bnz.h(<8 x i16>) ++ ++define i32 @lsx_bnz_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetallnez.h $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bnz.w(<4 x i32>) ++ ++define i32 @lsx_bnz_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetallnez.w $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB2_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bnz.d(<2 x i64>) ++ ++define i32 @lsx_bnz_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetallnez.d $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB3_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB3_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll +new file mode 100644 +index 000000000000..96c79c10e468 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.bz.b(<16 x i8>) ++ ++define i32 @lsx_bz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_bz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetanyeqz.b $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bz.h(<8 x i16>) ++ ++define i32 @lsx_bz_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_bz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetanyeqz.h $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bz.w(<4 x i32>) ++ ++define i32 @lsx_bz_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_bz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetanyeqz.w $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB2_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bz.d(<2 x i64>) ++ ++define i32 @lsx_bz_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_bz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetanyeqz.d $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB3_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB3_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll +new file mode 100644 +index 000000000000..f5d516521e45 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vshuf_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vshuf_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16>, <8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vshuf_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vshuf_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32>, <4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vshuf_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vshuf_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vshuf_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vshuf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll +new file mode 100644 +index 000000000000..1ad5f2af5591 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vshuf4i_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 255) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vshuf4i_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 255) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vshuf4i_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 255) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vshuf4i_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 255) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll +new file mode 100644 +index 000000000000..3997b0cc995c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsigncov_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsigncov_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsigncov.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsigncov_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsigncov_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsigncov.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsigncov_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsigncov_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsigncov.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsigncov_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsigncov_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsigncov.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll +new file mode 100644 +index 000000000000..5a9d5f06e63f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsle_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsle_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsle_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsle_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.w $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.d $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsle_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsle_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsle_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsle_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.bu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.hu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll +new file mode 100644 +index 000000000000..7bc20af41f17 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsll_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsll_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsll.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsll_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsll_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsll.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsll_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsll_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsll.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsll_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsll_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsll.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslli_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslli.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslli_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslli.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslli_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslli.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslli_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslli.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll +new file mode 100644 +index 000000000000..29ab70da1ced +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 7) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 15) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll +new file mode 100644 +index 000000000000..18683e9dc46f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vslt_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vslt_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vslt_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vslt_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.w $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.d $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vslt_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vslt_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vslt_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vslt_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.bu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.hu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll +new file mode 100644 +index 000000000000..e85c8464c18e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsra_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsra_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsra.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsra_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsra_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsra.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsra_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsra_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsra.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsra_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsra_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsra.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrai_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsrai_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrai_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsrai_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrai_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsrai_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrai_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsrai_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll +new file mode 100644 +index 000000000000..4ffe5a704c2c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vsran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsran_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsran.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vsran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsran_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsran.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vsran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsran_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsran.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll +new file mode 100644 +index 000000000000..717c641616c8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrani_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrani.b.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrani_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrani.h.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrani_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrani.w.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrani_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrani.d.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll +new file mode 100644 +index 000000000000..8b52b7ac9631 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsrar_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrar_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrar.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsrar_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrar_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrar.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsrar_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrar_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrar.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsrar_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrar_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrar.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrari_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsrari_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrari.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrari_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsrari_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrari.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrari_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsrari_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrari.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrari_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsrari_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrari.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll +new file mode 100644 +index 000000000000..d4cdfb5359ea +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vsrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarn.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vsrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarn.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vsrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarn.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll +new file mode 100644 +index 000000000000..2253e88372fc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll +new file mode 100644 +index 000000000000..1cddd9622233 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsrl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsrl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsrl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsrl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrl.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrli_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsrli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrli_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsrli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrli_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsrli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrli_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsrli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll +new file mode 100644 +index 000000000000..1c9b23243ffb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vsrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrln_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrln.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vsrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrln_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrln.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vsrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrln_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrln.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll +new file mode 100644 +index 000000000000..6e523efa1824 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll +new file mode 100644 +index 000000000000..51638fa1a47f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsrlr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlr.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsrlr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlr.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsrlr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlr.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsrlr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlr.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsrlri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlri.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsrlri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlri.h $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsrlri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlri.w $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsrlri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlri.d $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll +new file mode 100644 +index 000000000000..893e51396241 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vsrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrn.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vsrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrn.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vsrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrn.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll +new file mode 100644 +index 000000000000..d1ea450d2237 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.b.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.h.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.w.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.d.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll +new file mode 100644 +index 000000000000..cecccbb730c9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssran_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.bu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssran_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.hu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssran_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.wu.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll +new file mode 100644 +index 000000000000..57b8eb169866 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.bu.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.hu.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.wu.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.du.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll +new file mode 100644 +index 000000000000..c6b7d9ec8e1d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrarn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.bu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrarn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.hu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrarn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.wu.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll +new file mode 100644 +index 000000000000..1a2e91962ac3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.bu.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.hu.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.wu.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.du.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll +new file mode 100644 +index 000000000000..697ccc3962a8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrln_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.bu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrln_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.hu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrln_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.wu.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll +new file mode 100644 +index 000000000000..8dd41e7abe87 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.bu.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.hu.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.wu.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.du.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll +new file mode 100644 +index 000000000000..a8e76cbaa7fd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrlrn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.bu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrlrn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.hu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrlrn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.wu.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll +new file mode 100644 +index 000000000000..869e81b2b09d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.bu.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.hu.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.wu.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.du.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll +new file mode 100644 +index 000000000000..c594b426d650 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vssub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vssub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vssub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vssub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vssub_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vssub_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vssub_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vssub_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll +new file mode 100644 +index 000000000000..798f509f2318 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) ++ ++define void @lsx_vst(<16 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vst: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vst $vr0, $a0, -2048 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2048) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstx(<16 x i8>, i8*, i64) ++ ++define void @lsx_vstx(<16 x i8> %va, i8* %p, i64 %c) nounwind { ++; CHECK-LABEL: lsx_vstx: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstx $vr0, $a0, $a1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstx(<16 x i8> %va, i8* %p, i64 %c) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll +new file mode 100644 +index 000000000000..6b9e7a9d7462 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) ++ ++define void @lsx_vstelm_b(<16 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vstelm_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstelm.b $vr0, $a0, 1, 15 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 15) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) ++ ++define void @lsx_vstelm_h(<8 x i16> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vstelm_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstelm.h $vr0, $a0, 2, 7 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 7) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) ++ ++define void @lsx_vstelm_w(<4 x i32> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vstelm_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstelm.w $vr0, $a0, 4, 3 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 3) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) ++ ++define void @lsx_vstelm_d(<2 x i64> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vstelm_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstelm.d $vr0, $a0, 8, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 1) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll +new file mode 100644 +index 000000000000..5c04a3d8de0d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsub_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.q $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll +new file mode 100644 +index 000000000000..304a4e4a78cc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsubi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsubi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsubi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsubi_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsubi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll +new file mode 100644 +index 000000000000..48100db74334 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vsubwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vsubwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vsubwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsubwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vsubwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vsubwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vsubwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsubwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vsubwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vsubwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vsubwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsubwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vsubwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vsubwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vsubwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsubwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll +new file mode 100644 +index 000000000000..72a1fe93c2c0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vxor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vxor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll +new file mode 100644 +index 000000000000..09669cd5ac14 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vxori_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vxori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vxori.b $vr0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 3) ++ ret <16 x i8> %res ++} +-- +2.20.1 + diff --git a/0003-Backport-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-LoadAddress.patch b/0004-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-Loa.patch similarity index 99% rename from 0003-Backport-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-LoadAddress.patch rename to 0004-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-Loa.patch index 93a2174..7ad8680 100644 --- a/0003-Backport-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-LoadAddress.patch +++ b/0004-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-Loa.patch @@ -10,7 +10,6 @@ llvm-mc stage, which like what does on GAS. 2, la.got -> GOT_PC_HI20 + RELAX + GOT_PC_LO12 + RELAX (cherry picked from commit b3ef8dce9811b2725639b0d4fac3f85c7e112817) -Change-Id: I222daf60b36ee70e23c76b753e1d2a3b8148f44b --- .../AsmParser/LoongArchAsmParser.cpp | 12 +-- .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 13 +++ diff --git a/0004-LoongArch-Precommit-a-test-for-atomic-cmpxchg-optmiz.patch b/0004-LoongArch-Precommit-a-test-for-atomic-cmpxchg-optmiz.patch new file mode 100644 index 0000000..5b11543 --- /dev/null +++ b/0004-LoongArch-Precommit-a-test-for-atomic-cmpxchg-optmiz.patch @@ -0,0 +1,651 @@ +From 6722b1cd4fa76390264167efd95871ef8cbc9750 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Tue, 17 Oct 2023 17:41:32 +0800 +Subject: [PATCH 4/7] [LoongArch] Precommit a test for atomic cmpxchg + optmization + +(cherry picked from commit b2773d170cb4bdb4b19ba801b5eb55395024b3ae) + +--- + .../ir-instruction/atomic-cmpxchg.ll | 385 +++++++++++------- + 1 file changed, 245 insertions(+), 140 deletions(-) + +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 4f25a1d69af1..174bb9d0ff7d 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -106,6 +106,111 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind + ret void + } + ++define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB4_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB4_1 ++; LA64-NEXT: b .LBB4_4 ++; LA64-NEXT: .LBB4_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB4_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB5_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB5_1 ++; LA64-NEXT: b .LBB5_4 ++; LA64-NEXT: .LBB5_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB5_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB6_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB6_1 ++; LA64-NEXT: b .LBB6_4 ++; LA64-NEXT: .LBB6_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB6_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB7_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB7_1 ++; LA64-NEXT: b .LBB7_4 ++; LA64-NEXT: .LBB7_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB7_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic ++ ret void ++} ++ + define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: + ; LA64: # %bb.0: +@@ -121,19 +226,19 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB4_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB8_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB4_1 +-; LA64-NEXT: b .LBB4_4 +-; LA64-NEXT: .LBB4_3: ++; LA64-NEXT: beqz $a6, .LBB8_1 ++; LA64-NEXT: b .LBB8_4 ++; LA64-NEXT: .LBB8_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB4_4: ++; LA64-NEXT: .LBB8_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire +@@ -157,19 +262,19 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB5_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB9_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB5_1 +-; LA64-NEXT: b .LBB5_4 +-; LA64-NEXT: .LBB5_3: ++; LA64-NEXT: beqz $a6, .LBB9_1 ++; LA64-NEXT: b .LBB9_4 ++; LA64-NEXT: .LBB9_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB5_4: ++; LA64-NEXT: .LBB9_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire +@@ -180,17 +285,17 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB6_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB10_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB6_1 +-; LA64-NEXT: b .LBB6_4 +-; LA64-NEXT: .LBB6_3: ++; LA64-NEXT: beqz $a4, .LBB10_1 ++; LA64-NEXT: b .LBB10_4 ++; LA64-NEXT: .LBB10_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB6_4: ++; LA64-NEXT: .LBB10_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -201,17 +306,17 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB7_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB11_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB7_1 +-; LA64-NEXT: b .LBB7_4 +-; LA64-NEXT: .LBB7_3: ++; LA64-NEXT: beqz $a4, .LBB11_1 ++; LA64-NEXT: b .LBB11_4 ++; LA64-NEXT: .LBB11_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB7_4: ++; LA64-NEXT: .LBB11_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire +@@ -234,19 +339,19 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB8_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB12_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB8_1 +-; LA64-NEXT: b .LBB8_4 +-; LA64-NEXT: .LBB8_3: ++; LA64-NEXT: beqz $a6, .LBB12_1 ++; LA64-NEXT: b .LBB12_4 ++; LA64-NEXT: .LBB12_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB8_4: ++; LA64-NEXT: .LBB12_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -273,19 +378,19 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB9_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB13_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB9_1 +-; LA64-NEXT: b .LBB9_4 +-; LA64-NEXT: .LBB9_3: ++; LA64-NEXT: beqz $a6, .LBB13_1 ++; LA64-NEXT: b .LBB13_4 ++; LA64-NEXT: .LBB13_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB9_4: ++; LA64-NEXT: .LBB13_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -299,17 +404,17 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB10_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB14_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB10_1 +-; LA64-NEXT: b .LBB10_4 +-; LA64-NEXT: .LBB10_3: ++; LA64-NEXT: beqz $a4, .LBB14_1 ++; LA64-NEXT: b .LBB14_4 ++; LA64-NEXT: .LBB14_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB10_4: ++; LA64-NEXT: .LBB14_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 + ; LA64-NEXT: sltui $a0, $a0, 1 +@@ -322,17 +427,17 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB11_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB15_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB11_1 +-; LA64-NEXT: b .LBB11_4 +-; LA64-NEXT: .LBB11_3: ++; LA64-NEXT: beqz $a4, .LBB15_1 ++; LA64-NEXT: b .LBB15_4 ++; LA64-NEXT: .LBB15_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB11_4: ++; LA64-NEXT: .LBB15_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret +@@ -356,19 +461,19 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a2, $a2, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a3, 0 + ; LA64-NEXT: and $a5, $a4, $a0 +-; LA64-NEXT: bne $a5, $a1, .LBB12_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ++; LA64-NEXT: bne $a5, $a1, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 +-; LA64-NEXT: beqz $a5, .LBB12_1 +-; LA64-NEXT: b .LBB12_4 +-; LA64-NEXT: .LBB12_3: ++; LA64-NEXT: beqz $a5, .LBB16_1 ++; LA64-NEXT: b .LBB16_4 ++; LA64-NEXT: .LBB16_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB12_4: ++; LA64-NEXT: .LBB16_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + ret void +@@ -390,19 +495,19 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a2, $a2, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a3, 0 + ; LA64-NEXT: and $a5, $a4, $a0 +-; LA64-NEXT: bne $a5, $a1, .LBB13_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ++; LA64-NEXT: bne $a5, $a1, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 +-; LA64-NEXT: beqz $a5, .LBB13_1 +-; LA64-NEXT: b .LBB13_4 +-; LA64-NEXT: .LBB13_3: ++; LA64-NEXT: beqz $a5, .LBB17_1 ++; LA64-NEXT: b .LBB17_4 ++; LA64-NEXT: .LBB17_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB13_4: ++; LA64-NEXT: .LBB17_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + ret void +@@ -411,17 +516,17 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw + define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB14_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB18_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB14_1 +-; LA64-NEXT: b .LBB14_4 +-; LA64-NEXT: .LBB14_3: ++; LA64-NEXT: beqz $a4, .LBB18_1 ++; LA64-NEXT: b .LBB18_4 ++; LA64-NEXT: .LBB18_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB14_4: ++; LA64-NEXT: .LBB18_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + ret void +@@ -430,17 +535,17 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw + define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB15_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB19_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB15_1 +-; LA64-NEXT: b .LBB15_4 +-; LA64-NEXT: .LBB15_3: ++; LA64-NEXT: beqz $a4, .LBB19_1 ++; LA64-NEXT: b .LBB19_4 ++; LA64-NEXT: .LBB19_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB15_4: ++; LA64-NEXT: .LBB19_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + ret void +@@ -461,19 +566,19 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB16_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB16_1 +-; LA64-NEXT: b .LBB16_4 +-; LA64-NEXT: .LBB16_3: ++; LA64-NEXT: beqz $a6, .LBB20_1 ++; LA64-NEXT: b .LBB20_4 ++; LA64-NEXT: .LBB20_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB16_4: ++; LA64-NEXT: .LBB20_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic +@@ -497,19 +602,19 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB17_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB17_1 +-; LA64-NEXT: b .LBB17_4 +-; LA64-NEXT: .LBB17_3: ++; LA64-NEXT: beqz $a6, .LBB21_1 ++; LA64-NEXT: b .LBB21_4 ++; LA64-NEXT: .LBB21_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB17_4: ++; LA64-NEXT: .LBB21_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic +@@ -520,17 +625,17 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) + define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB18_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB22_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB18_1 +-; LA64-NEXT: b .LBB18_4 +-; LA64-NEXT: .LBB18_3: ++; LA64-NEXT: beqz $a4, .LBB22_1 ++; LA64-NEXT: b .LBB22_4 ++; LA64-NEXT: .LBB22_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB18_4: ++; LA64-NEXT: .LBB22_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic +@@ -541,17 +646,17 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) + define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB19_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB23_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB19_1 +-; LA64-NEXT: b .LBB19_4 +-; LA64-NEXT: .LBB19_3: ++; LA64-NEXT: beqz $a4, .LBB23_1 ++; LA64-NEXT: b .LBB23_4 ++; LA64-NEXT: .LBB23_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB19_4: ++; LA64-NEXT: .LBB23_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic +@@ -574,19 +679,19 @@ define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) noun + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB20_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB24_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB20_1 +-; LA64-NEXT: b .LBB20_4 +-; LA64-NEXT: .LBB20_3: ++; LA64-NEXT: beqz $a6, .LBB24_1 ++; LA64-NEXT: b .LBB24_4 ++; LA64-NEXT: .LBB24_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB20_4: ++; LA64-NEXT: .LBB24_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -613,19 +718,19 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB21_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB25_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB21_1 +-; LA64-NEXT: b .LBB21_4 +-; LA64-NEXT: .LBB21_3: ++; LA64-NEXT: beqz $a6, .LBB25_1 ++; LA64-NEXT: b .LBB25_4 ++; LA64-NEXT: .LBB25_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB21_4: ++; LA64-NEXT: .LBB25_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -639,17 +744,17 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n + define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB22_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB26_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB22_1 +-; LA64-NEXT: b .LBB22_4 +-; LA64-NEXT: .LBB22_3: ++; LA64-NEXT: beqz $a4, .LBB26_1 ++; LA64-NEXT: b .LBB26_4 ++; LA64-NEXT: .LBB26_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB22_4: ++; LA64-NEXT: .LBB26_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 + ; LA64-NEXT: sltui $a0, $a0, 1 +@@ -662,17 +767,17 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n + define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB23_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB27_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB23_1 +-; LA64-NEXT: b .LBB23_4 +-; LA64-NEXT: .LBB23_3: ++; LA64-NEXT: beqz $a4, .LBB27_1 ++; LA64-NEXT: b .LBB27_4 ++; LA64-NEXT: .LBB27_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB23_4: ++; LA64-NEXT: .LBB27_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret +-- +2.20.1 + diff --git a/0004-lld-test-LoongArch-Remove-the-test-for-R_LARCH_CALL3.patch b/0004-lld-test-LoongArch-Remove-the-test-for-R_LARCH_CALL3.patch new file mode 100644 index 0000000..e2e147a --- /dev/null +++ b/0004-lld-test-LoongArch-Remove-the-test-for-R_LARCH_CALL3.patch @@ -0,0 +1,36 @@ +From 9c00b0303b007132a97499073bf2e9a0de2b36ba Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Mon, 25 Dec 2023 18:28:19 +0800 +Subject: [PATCH 04/23] [lld][test][LoongArch] Remove the test for + R_LARCH_CALL36 range checking + +Several buildbots report: +ld.lld: error: failed to open /dev/null: Cannot allocate memory + +For example: +- https://lab.llvm.org/buildbot/#/builders/184/builds/8842 +- https://lab.llvm.org/buildbot/#/builders/247/builds/12559 + +(cherry picked from commit 0fbc728dba97149e530cfb7f2ada0283c398a7ce) +--- + lld/test/ELF/loongarch-call36.s | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/lld/test/ELF/loongarch-call36.s b/lld/test/ELF/loongarch-call36.s +index 2d25a2ac64ed..0a00adacbd6a 100644 +--- a/lld/test/ELF/loongarch-call36.s ++++ b/lld/test/ELF/loongarch-call36.s +@@ -40,10 +40,6 @@ + # GOTPLT-NEXT: 0x01274730 00000000 00000000 00000000 00000000 + # GOTPLT-NEXT: 0x01274740 00452301 00000000 + +-# RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x2000020000 -o /dev/null 2>&1 | \ +-# RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-RANGE %s +-# ERROR-RANGE: error: [[FILE]]:(.text+0x0): relocation R_LARCH_CALL36 out of range: 137438953472 is not in [-137439084544, 137438822399]; references 'foo' +- + ## Impossible case in reality becasue all LoongArch instructions are fixed 4-bytes long. + # RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x40001 -o /dev/null 2>&1 | \ + # RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-ALIGN %s +-- +2.20.1 + diff --git a/0005-LoongArch-Add-LASX-intrinsic-testcases.patch b/0005-LoongArch-Add-LASX-intrinsic-testcases.patch new file mode 100644 index 0000000..8150758 --- /dev/null +++ b/0005-LoongArch-Add-LASX-intrinsic-testcases.patch @@ -0,0 +1,10197 @@ +From 9c263a60aeeefe92b71e566b482e833e2b6d8f4f Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:12:27 +0800 +Subject: [PATCH 05/42] [LoongArch] Add LASX intrinsic testcases + +Depends on D155830 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D155835 + +(cherry picked from commit 83311b2b5d1b9869f9a7b265994394ea898448a2) + +--- + .../CodeGen/LoongArch/lasx/intrinsic-absd.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-add.ll | 62 ++ + .../CodeGen/LoongArch/lasx/intrinsic-adda.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-addi.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-addw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-and.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-andi.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-andn.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-avg.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-avgr.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-bitclr.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-bitrev.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-bitsel.ll | 14 + + .../LoongArch/lasx/intrinsic-bitseli.ll | 14 + + .../LoongArch/lasx/intrinsic-bitset.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-bsll.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-bsrl.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-clo.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-clz.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-div.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-ext2xv.ll | 146 +++++ + .../CodeGen/LoongArch/lasx/intrinsic-exth.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-extl.ll | 26 + + .../LoongArch/lasx/intrinsic-extrins.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-fadd.ll | 26 + + .../LoongArch/lasx/intrinsic-fclass.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fcmp.ll | 530 ++++++++++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-fcvt.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fcvth.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fdiv.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-ffint.ll | 86 +++ + .../CodeGen/LoongArch/lasx/intrinsic-flogb.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmadd.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmax.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmin.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmina.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmsub.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmul.ll | 26 + + .../LoongArch/lasx/intrinsic-fnmadd.ll | 26 + + .../LoongArch/lasx/intrinsic-fnmsub.ll | 26 + + .../LoongArch/lasx/intrinsic-frecip.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-frint.ll | 122 ++++ + .../LoongArch/lasx/intrinsic-frsqrt.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-frstp.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fsub.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-ftint.ll | 350 ++++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-haddw.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-hsubw.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-ilv.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-insgr2vr.ll | 28 + + .../LoongArch/lasx/intrinsic-insve0.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-ld.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-ldi.ll | 62 ++ + .../LoongArch/lasx/intrinsic-ldrepl.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-madd.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-maddw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-max.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-min.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-mod.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-mskgez.ll | 14 + + .../LoongArch/lasx/intrinsic-mskltz.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-msknz.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-msub.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-muh.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-mul.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-mulw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-neg.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-nor.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-nori.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-or.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-ori.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-orn.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-pack.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-pcnt.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-perm.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-permi.ll | 38 ++ + .../CodeGen/LoongArch/lasx/intrinsic-pick.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-pickve.ll | 50 ++ + .../LoongArch/lasx/intrinsic-pickve2gr.ll | 53 ++ + .../LoongArch/lasx/intrinsic-repl128vei.ll | 50 ++ + .../LoongArch/lasx/intrinsic-replgr2vr.ll | 50 ++ + .../LoongArch/lasx/intrinsic-replve.ll | 50 ++ + .../LoongArch/lasx/intrinsic-replve0.ll | 62 ++ + .../CodeGen/LoongArch/lasx/intrinsic-rotr.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-sadd.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-sat.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-seq.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-set.ll | 38 ++ + .../LoongArch/lasx/intrinsic-setallnez.ll | 74 +++ + .../LoongArch/lasx/intrinsic-setanyeqz.ll | 74 +++ + .../CodeGen/LoongArch/lasx/intrinsic-shuf.ll | 50 ++ + .../LoongArch/lasx/intrinsic-shuf4i.ll | 50 ++ + .../LoongArch/lasx/intrinsic-signcov.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-sle.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-sll.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-sllwil.ll | 74 +++ + .../CodeGen/LoongArch/lasx/intrinsic-slt.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-sra.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-sran.ll | 38 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srani.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srar.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-srarn.ll | 38 ++ + .../LoongArch/lasx/intrinsic-srarni.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srl.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-srln.ll | 38 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srlni.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srlr.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-srlrn.ll | 38 ++ + .../LoongArch/lasx/intrinsic-srlrni.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-ssran.ll | 74 +++ + .../LoongArch/lasx/intrinsic-ssrani.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-ssrarn.ll | 74 +++ + .../LoongArch/lasx/intrinsic-ssrarni.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-ssrln.ll | 74 +++ + .../LoongArch/lasx/intrinsic-ssrlni.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-ssrlrn.ll | 74 +++ + .../LoongArch/lasx/intrinsic-ssrlrni.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-ssub.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-st.ll | 27 + + .../CodeGen/LoongArch/lasx/intrinsic-stelm.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-sub.ll | 62 ++ + .../CodeGen/LoongArch/lasx/intrinsic-subi.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-subw.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-xor.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-xori.ll | 14 + + 128 files changed, 9154 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll +new file mode 100644 +index 000000000000..bf54f44357b0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvabsd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvabsd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvabsd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvabsd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvabsd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvabsd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvabsd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvabsd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll +new file mode 100644 +index 000000000000..0c2f2ace29fc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvadd_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.q $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll +new file mode 100644 +index 000000000000..c1258d53e913 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvadda_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadda_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvadda_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadda_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvadda_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadda_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvadda_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadda_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll +new file mode 100644 +index 000000000000..09b5d07a0151 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvaddi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvaddi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvaddi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvaddi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll +new file mode 100644 +index 000000000000..ef7a1b5a50ef +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.h.bu.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.w.hu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.d.wu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.q.du.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.h.bu.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.w.hu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.d.wu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.q.du.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll +new file mode 100644 +index 000000000000..15f3a8094770 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvand_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvand_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll +new file mode 100644 +index 000000000000..88cf142d6968 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvandi_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvandi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvandi.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll +new file mode 100644 +index 000000000000..f385ef3661cb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvandn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvandn_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll +new file mode 100644 +index 000000000000..488d3b96b003 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvavg_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvavg_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvavg_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvavg_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvavg_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvavg_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvavg_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvavg_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll +new file mode 100644 +index 000000000000..b5ab5a5366aa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvavgr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvavgr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvavgr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvavgr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvavgr_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvavgr_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvavgr_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvavgr_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll +new file mode 100644 +index 000000000000..cec71bab2fe8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvbitclr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitclr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclr.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvbitclr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitclr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclr.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvbitclr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitclr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclr.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvbitclr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitclr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitclri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclri.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitclri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclri.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitclri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclri.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitclri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclri.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll +new file mode 100644 +index 000000000000..fb4f9fbc2e4b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvbitrev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitrev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrev.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvbitrev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitrev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrev.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvbitrev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitrev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrev.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvbitrev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitrev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrev.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitrevi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrevi.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitrevi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrevi.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitrevi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitrevi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll +new file mode 100644 +index 000000000000..2e91407590ac +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvbitsel_v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvbitsel_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitsel.v $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll +new file mode 100644 +index 000000000000..79dd55cbfef9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitseli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll +new file mode 100644 +index 000000000000..83d1f0ef60c6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvbitset_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitset_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitset.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvbitset_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitset_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitset.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvbitset_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitset_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitset.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvbitset_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitset_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitset.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitseti_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseti.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitseti_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitseti_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitseti_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseti.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll +new file mode 100644 +index 000000000000..cbb63ced5cc0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbsll_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbsll.v $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll +new file mode 100644 +index 000000000000..b0c26cbe3e35 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbsrl_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll +new file mode 100644 +index 000000000000..29b2be03d54e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvclo_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvclo_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclo.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvclo_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvclo_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclo.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvclo_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvclo_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclo.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvclo_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvclo_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclo.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll +new file mode 100644 +index 000000000000..5247ceedbd14 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvclz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvclz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclz.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvclz_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvclz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclz.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvclz_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvclz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclz.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvclz_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvclz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclz.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll +new file mode 100644 +index 000000000000..813204092e94 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvdiv_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvdiv_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvdiv_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvdiv_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvdiv_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvdiv_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvdiv_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvdiv_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll +new file mode 100644 +index 000000000000..48721b52af00 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll +@@ -0,0 +1,146 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8>) ++ ++define <16 x i16> @lasx_vext2xv_h_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8>) ++ ++define <8 x i32> @lasx_vext2xv_w_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_w_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.w.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8>) ++ ++define <4 x i64> @lasx_vext2xv_d_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_d_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16>) ++ ++define <8 x i32> @lasx_vext2xv_w_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16>) ++ ++define <4 x i64> @lasx_vext2xv_d_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_d_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.d.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32>) ++ ++define <4 x i64> @lasx_vext2xv_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> %va) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8>) ++ ++define <16 x i16> @lasx_vext2xv_hu_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8>) ++ ++define <8 x i32> @lasx_vext2xv_wu_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_wu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.wu.bu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8>) ++ ++define <4 x i64> @lasx_vext2xv_du_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_du_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16>) ++ ++define <8 x i32> @lasx_vext2xv_wu_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16>) ++ ++define <4 x i64> @lasx_vext2xv_du_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_du_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.du.hu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32>) ++ ++define <4 x i64> @lasx_vext2xv_du_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll +new file mode 100644 +index 000000000000..543589e61b12 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8>) ++ ++define <16 x i16> @lasx_xvexth_h_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.h.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16>) ++ ++define <8 x i32> @lasx_xvexth_w_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.w.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32>) ++ ++define <4 x i64> @lasx_xvexth_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.d.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvexth_q_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.q.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8>) ++ ++define <16 x i16> @lasx_xvexth_hu_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.hu.bu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16>) ++ ++define <8 x i32> @lasx_xvexth_wu_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.wu.hu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32>) ++ ++define <4 x i64> @lasx_xvexth_du_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.du.wu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64>) ++ ++define <4 x i64> @lasx_xvexth_qu_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.qu.du $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll +new file mode 100644 +index 000000000000..7040c8c784cd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvextl_q_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvextl_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextl.q.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64>) ++ ++define <4 x i64> @lasx_xvextl_qu_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvextl_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextl.qu.du $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll +new file mode 100644 +index 000000000000..c8774a7b29c0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvextrins_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextrins.b $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvextrins_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextrins.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvextrins_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextrins.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvextrins_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextrins.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll +new file mode 100644 +index 000000000000..563a0ce9e384 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfadd_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfadd_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll +new file mode 100644 +index 000000000000..901ca5bb0260 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvfclass_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfclass_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfclass.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvfclass_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfclass_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfclass.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll +new file mode 100644 +index 000000000000..b01f908e71af +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll +@@ -0,0 +1,530 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_caf_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_caf_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.caf.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_caf_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_caf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.caf.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cun_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cun_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cun_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cun_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_ceq_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_ceq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_ceq_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_ceq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cueq_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cueq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cueq_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cueq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_clt_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_clt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_clt_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_clt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cult_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cult_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cult_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cult_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cle_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cle_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cle_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cule_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cule_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cule_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cule_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cne_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cne_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cor_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cor_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cor_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cor_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cune_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cune_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cune_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cune_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_saf_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_saf_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.saf.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_saf_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_saf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.saf.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sun_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sun_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sun.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sun_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sun_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sun.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_seq_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_seq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.seq.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_seq_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_seq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.seq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sueq_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sueq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sueq.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sueq_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sueq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sueq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_slt_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_slt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.slt.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_slt_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_slt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.slt.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sult_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sult_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sult.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sult_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sult_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sult.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sle_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sle_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sle.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sle_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sle.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sule_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sule_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sule.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sule_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sule_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sule.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sne_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sne.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sne_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sne.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sor_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sor_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sor.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sor_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sor_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sor.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sune_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sune_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sune.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sune_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sune_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sune.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll +new file mode 100644 +index 000000000000..82bf1d3df72c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float>, <8 x float>) ++ ++define <16 x i16> @lasx_xvfcvt_h_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcvt_h_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvt.h.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> %va, <8 x float> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double>, <4 x double>) ++ ++define <8 x float> @lasx_xvfcvt_s_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcvt_s_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x float> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll +new file mode 100644 +index 000000000000..e1a6a2923e67 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16>) ++ ++define <8 x float> @lasx_xvfcvth_s_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvfcvth_s_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvth.s.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float>) ++ ++define <4 x double> @lasx_xvfcvth_d_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfcvth_d_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvth.d.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll +new file mode 100644 +index 000000000000..0b3e693c7f51 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16>) ++ ++define <8 x float> @lasx_xvfcvtl_s_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvfcvtl_s_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvtl.s.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float>) ++ ++define <4 x double> @lasx_xvfcvtl_d_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfcvtl_d_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvtl.d.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll +new file mode 100644 +index 000000000000..49923ddd4e8d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfdiv_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfdiv_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfdiv.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfdiv_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfdiv_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfdiv.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll +new file mode 100644 +index 000000000000..24da0bd33838 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll +@@ -0,0 +1,86 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32>) ++ ++define <8 x float> @lasx_xvffint_s_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvffint_s_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.s.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64>) ++ ++define <4 x double> @lasx_xvffint_d_l(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvffint_d_l: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.d.l $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32>) ++ ++define <8 x float> @lasx_xvffint_s_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvffint_s_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64>) ++ ++define <4 x double> @lasx_xvffint_d_lu(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvffint_d_lu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> %va) ++ ret <4 x double> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32>) ++ ++define <4 x double> @lasx_xvffintl_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvffintl_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffintl.d.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> %va) ++ ret <4 x double> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32>) ++ ++define <4 x double> @lasx_xvffinth_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvffinth_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffinth.d.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64>, <4 x i64>) ++ ++define <8 x float> @lasx_xvffint_s_l(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvffint_s_l: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.s.l $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x float> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll +new file mode 100644 +index 000000000000..bccef4504d70 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float>) ++ ++define <8 x float> @lasx_xvflogb_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvflogb_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvflogb.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double>) ++ ++define <4 x double> @lasx_xvflogb_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvflogb_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvflogb.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll +new file mode 100644 +index 000000000000..0fc06f971660 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float>, <8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfmadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double>, <4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll +new file mode 100644 +index 000000000000..2422fa0c00d8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmax_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmax_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmax.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmax_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmax_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmax.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll +new file mode 100644 +index 000000000000..cd9ccc656aef +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmaxa_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmaxa_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmaxa.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmaxa_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmaxa_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmaxa.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll +new file mode 100644 +index 000000000000..effb3f9e1d75 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmin_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmin_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmin.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmin_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmin_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmin.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll +new file mode 100644 +index 000000000000..753a6f31ba06 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmina_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmina_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmina.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmina_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmina_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmina.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll +new file mode 100644 +index 000000000000..57909d0dd168 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float>, <8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfmsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double>, <4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll +new file mode 100644 +index 000000000000..9cad6f383066 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmul_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmul_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmul.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmul_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmul_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmul.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll +new file mode 100644 +index 000000000000..c30993590f98 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float>, <8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfnmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfnmadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double>, <4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfnmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfnmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll +new file mode 100644 +index 000000000000..2e7ca695be62 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float>, <8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfnmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfnmsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double>, <4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfnmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfnmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll +new file mode 100644 +index 000000000000..da3a26df2824 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrecip_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrecip_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrecip.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrecip_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrecip_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrecip.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll +new file mode 100644 +index 000000000000..ddead27cd14b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrintrne_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrintrne_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrintrz_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrz_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrintrz_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrintrp_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrp_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrintrp_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrp_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrintrm_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrm_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrintrm_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrm_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrint_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrint_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrint.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrint_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrint_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrint.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll +new file mode 100644 +index 000000000000..6efa8122baf1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrsqrt_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrsqrt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrsqrt_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrsqrt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll +new file mode 100644 +index 000000000000..e83e55a52a11 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvfrstp_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfrstp_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrstp.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16>, <16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvfrstp_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfrstp_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrstp.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfrstpi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrstpi.b $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfrstpi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrstpi.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll +new file mode 100644 +index 000000000000..a13333d8d81c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfsqrt_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfsqrt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfsqrt_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfsqrt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll +new file mode 100644 +index 000000000000..b52774a03618 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfsub_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfsub_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll +new file mode 100644 +index 000000000000..74cd507f16d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll +@@ -0,0 +1,350 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrne_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrne_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrne.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrne_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrne_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrne.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrz_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrz_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrp_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrp_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrp.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrp_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrp_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrp.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrm_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrm_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrm.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrm_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrm_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrm.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftint_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftint_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftint_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftint_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrz_wu_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_wu_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrz_lu_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_lu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftint_wu_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftint_wu_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.wu.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftint_lu_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftint_lu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.lu.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftintrne_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftintrne_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrne.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftintrz_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftintrp_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftintrp_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrp.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftintrm_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftintrm_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrm.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftint_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftint_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrnel_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrnel_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrnel.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrneh_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrneh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrneh.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrzl_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrzl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrzl.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrzh_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrzh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrzh.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrpl_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrpl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrpl.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrph_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrph_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrph.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrml_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrml_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrml.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrmh_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrmh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrmh.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintl_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintl.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftinth_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftinth_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftinth.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll +new file mode 100644 +index 000000000000..2c64ab23806b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvhaddw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvhaddw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvhaddw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvhaddw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvhaddw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.hu.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvhaddw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.wu.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvhaddw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvhaddw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.qu.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll +new file mode 100644 +index 000000000000..a5223c1d89a0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvhsubw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvhsubw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvhsubw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvhsubw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvhsubw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.hu.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvhsubw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.wu.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvhsubw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvhsubw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.qu.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll +new file mode 100644 +index 000000000000..c9d0ca6b0324 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvilvl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvilvl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvilvl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvilvl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvilvh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvh_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvilvh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvh_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvilvh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvh_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvilvh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvh_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll +new file mode 100644 +index 000000000000..ea98c96464ae +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) ++ ++define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvinsgr2vr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) ++ ++define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvinsgr2vr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll +new file mode 100644 +index 000000000000..27ae819c4144 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvinsve0_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvinsve0_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll +new file mode 100644 +index 000000000000..5ffc629db466 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) ++ ++define <32 x i8> @lasx_xvld(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvld: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvldx(i8*, i64) ++ ++define <32 x i8> @lasx_xvldx(i8* %p, i64 %b) nounwind { ++; CHECK-LABEL: lasx_xvldx: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldx $xr0, $a0, $a1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldx(i8* %p, i64 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll +new file mode 100644 +index 000000000000..59f79dd32af3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) ++ ++define <4 x i64> @lasx_xvldi() nounwind { ++; CHECK-LABEL: lasx_xvldi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldi $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) ++ ++define <32 x i8> @lasx_xvrepli_b() nounwind { ++; CHECK-LABEL: lasx_xvrepli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.b $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) ++ ++define <16 x i16> @lasx_xvrepli_h() nounwind { ++; CHECK-LABEL: lasx_xvrepli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.h $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) ++ ++define <8 x i32> @lasx_xvrepli_w() nounwind { ++; CHECK-LABEL: lasx_xvrepli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.w $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) ++ ++define <4 x i64> @lasx_xvrepli_d() nounwind { ++; CHECK-LABEL: lasx_xvrepli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.d $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll +new file mode 100644 +index 000000000000..ae6abdf81cbc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) ++ ++define <32 x i8> @lasx_xvldrepl_b(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvldrepl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldrepl.b $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) ++ ++define <16 x i16> @lasx_xvldrepl_h(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvldrepl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldrepl.h $xr0, $a0, 2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) ++ ++define <8 x i32> @lasx_xvldrepl_w(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvldrepl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldrepl.w $xr0, $a0, 4 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 4) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) ++ ++define <4 x i64> @lasx_xvldrepl_d(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvldrepl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldrepl.d $xr0, $a0, 8 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 8) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll +new file mode 100644 +index 000000000000..d3b09396727e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmadd_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmadd.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16>, <16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmadd_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmadd.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32>, <8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmadd_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmadd.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmadd_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmadd.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll +new file mode 100644 +index 000000000000..146624a764a2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwev_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.h.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwev_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.w.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwev_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.d.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwev_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.q.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwev_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.h.bu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwev_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.w.hu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwev_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.d.wu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwev_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.q.du $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwev_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.h.bu.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwev_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.w.hu.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwev_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.d.wu.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.q.du.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwod_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.h.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwod_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.w.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwod_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.d.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwod_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.q.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwod_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.h.bu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwod_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.w.hu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwod_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.d.wu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwod_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.q.du $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwod_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.h.bu.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwod_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.w.hu.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwod_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.d.wu.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.q.du.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll +new file mode 100644 +index 000000000000..9cf09df4439a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmax_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmax_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmax_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmax_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_vmax_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_vmax_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmax_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmax_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmax_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll +new file mode 100644 +index 000000000000..c94b1e4ea44c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmin_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmin_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmin_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmin_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmin_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmin_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmin_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmin_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll +new file mode 100644 +index 000000000000..a177246bb235 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmod_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmod_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmod_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmod_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll +new file mode 100644 +index 000000000000..da87c20ad6ee +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvmskgez_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskgez_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskgez.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll +new file mode 100644 +index 000000000000..b2218487535c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvmskltz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskltz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskltz.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvmskltz_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskltz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskltz.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvmskltz_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskltz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskltz.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvmskltz_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskltz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskltz.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll +new file mode 100644 +index 000000000000..becd2c883a7e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvmsknz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmsknz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsknz.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll +new file mode 100644 +index 000000000000..c89f9578b77d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmsub_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmsub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsub.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16>, <16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmsub_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmsub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsub.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32>, <8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmsub_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmsub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsub.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmsub_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsub.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll +new file mode 100644 +index 000000000000..97461512ce16 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmuh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmuh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmuh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmuh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmuh_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmuh_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmuh_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmuh_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll +new file mode 100644 +index 000000000000..d5d852e58a9f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmul_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmul_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmul_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmul_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmul_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmul_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmul_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmul_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll +new file mode 100644 +index 000000000000..f69e64aa7698 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll +new file mode 100644 +index 000000000000..ecbedf334657 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvneg_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvneg_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvneg.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvneg_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvneg_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvneg.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvneg_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvneg_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvneg.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvneg_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvneg_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvneg.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll +new file mode 100644 +index 000000000000..674746b7624e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvnor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvnor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll +new file mode 100644 +index 000000000000..55eebf87ee92 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvnori_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvnori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvnori.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll +new file mode 100644 +index 000000000000..16462cfafc54 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll +new file mode 100644 +index 000000000000..8e53d88bac37 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvori_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvori.b $xr0, $xr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 3) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll +new file mode 100644 +index 000000000000..3a335cdd3716 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvorn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvorn_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll +new file mode 100644 +index 000000000000..512b30234917 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvpackev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackev.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvpackev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackev.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvpackev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackev.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvpackev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackev.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvpackod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackod.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvpackod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackod.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvpackod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackod.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvpackod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackod.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll +new file mode 100644 +index 000000000000..d77f1d2082c8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvpcnt_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvpcnt_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpcnt.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvpcnt_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvpcnt_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpcnt.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvpcnt_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvpcnt_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpcnt.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvpcnt_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpcnt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpcnt.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll +new file mode 100644 +index 000000000000..4ec434edd4ec +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvperm_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvperm_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll +new file mode 100644 +index 000000000000..0d9f9daabc44 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpermi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpermi.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpermi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpermi.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpermi_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll +new file mode 100644 +index 000000000000..bbd6d693ca0b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvpickev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickev.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvpickev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickev.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvpickev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickev.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvpickev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickev.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvpickod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickod.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvpickod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickod.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvpickod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickod.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvpickod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickod.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll +new file mode 100644 +index 000000000000..546777bc72ab +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) ++ ++define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve_w_f: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 1) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) ++ ++define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve_d_f: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 1) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll +new file mode 100644 +index 000000000000..0617e7424321 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll +@@ -0,0 +1,53 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++ ++ ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve2gr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 1) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve2gr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 1) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve2gr_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 1) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve2gr_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve2gr.du $a0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 1) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll +new file mode 100644 +index 000000000000..25fab44f461f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvrepl128vei_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvrepl128vei_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvrepl128vei_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvrepl128vei_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll +new file mode 100644 +index 000000000000..c71abd2205c6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32) ++ ++define <32 x i8> @lasx_xvreplgr2vr_b(i32 %a) nounwind { ++; CHECK-LABEL: lasx_xvreplgr2vr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 %a) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32) ++ ++define <16 x i16> @lasx_xvreplgr2vr_h(i32 %a) nounwind { ++; CHECK-LABEL: lasx_xvreplgr2vr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 %a) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32) ++ ++define <8 x i32> @lasx_xvreplgr2vr_w(i32 %a) nounwind { ++; CHECK-LABEL: lasx_xvreplgr2vr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64) ++ ++define <4 x i64> @lasx_xvreplgr2vr_d(i64 %a) nounwind { ++; CHECK-LABEL: lasx_xvreplgr2vr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll +new file mode 100644 +index 000000000000..21d36ff7bb5e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvreplve_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK-LABEL: lasx_xvreplve_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvreplve_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK-LABEL: lasx_xvreplve_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvreplve_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK-LABEL: lasx_xvreplve_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvreplve_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK-LABEL: lasx_xvreplve_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll +new file mode 100644 +index 000000000000..7996bb36ef03 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvreplve0_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvreplve0_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvreplve0_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvreplve0_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8>) ++ ++define <32 x i8> @lasx_xvreplve0_q(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.q $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> %va) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll +new file mode 100644 +index 000000000000..64d2773864e9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvrotr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvrotr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvrotr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvrotr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvrotr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvrotr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvrotr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvrotr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotr.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvrotri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotri.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvrotri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotri.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvrotri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotri.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvrotri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotri.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll +new file mode 100644 +index 000000000000..54a5e2e9c833 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsadd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsadd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsadd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsadd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll +new file mode 100644 +index 000000000000..293b9dc9eb4d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll +new file mode 100644 +index 000000000000..83bc93c88c73 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvseq_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvseq_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseq.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvseq_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvseq_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseq.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvseq_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvseq_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvseq_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvseq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvseqi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseqi.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvseqi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseqi.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvseqi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseqi.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvseqi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseqi.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll +new file mode 100644 +index 000000000000..6e3e2e0330f5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xbz.v(<32 x i8>) ++ ++define i32 @lasx_xbz_v(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseteqz.v $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8>) ++ ++define i32 @lasx_xbnz_v(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetnez.v $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll +new file mode 100644 +index 000000000000..a466b78bf8d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8>) ++ ++define i32 @lasx_xbnz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetallnez.b $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16>) ++ ++define i32 @lasx_xbnz_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetallnez.h $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32>) ++ ++define i32 @lasx_xbnz_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetallnez.w $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB2_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64>) ++ ++define i32 @lasx_xbnz_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetallnez.d $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB3_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB3_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll +new file mode 100644 +index 000000000000..36e65fc5b328 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xbz.b(<32 x i8>) ++ ++define i32 @lasx_xbz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetanyeqz.b $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbz.h(<16 x i16>) ++ ++define i32 @lasx_xbz_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetanyeqz.h $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbz.w(<8 x i32>) ++ ++define i32 @lasx_xbz_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetanyeqz.w $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB2_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbz.d(<4 x i64>) ++ ++define i32 @lasx_xbz_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetanyeqz.d $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB3_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB3_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll +new file mode 100644 +index 000000000000..9b9140f6ad62 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvshuf_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvshuf_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16>, <16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvshuf_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvshuf_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32>, <8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvshuf_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvshuf_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvshuf_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvshuf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll +new file mode 100644 +index 000000000000..31205086759c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvshuf4i_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvshuf4i_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvshuf4i_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvshuf4i_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll +new file mode 100644 +index 000000000000..e6c6d8ccd0d3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsigncov_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsigncov_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsigncov.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsigncov_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsigncov_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsigncov.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsigncov_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsigncov_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsigncov.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsigncov_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsigncov_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsigncov.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll +new file mode 100644 +index 000000000000..8895efc84b84 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsle_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsle_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsle_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsle_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsle_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsle_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsle_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsle_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll +new file mode 100644 +index 000000000000..14110b613dbe +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsll_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsll_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsll_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsll_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsll_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsll_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsll_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsll_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsll.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslli_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslli_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslli_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslli_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll +new file mode 100644 +index 000000000000..a72b8a6cbb4f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.h.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.w.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.d.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.hu.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.wu.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.du.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll +new file mode 100644 +index 000000000000..3ea87adff110 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvslt_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvslt_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvslt_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvslt_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvslt_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvslt_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvslt_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvslt_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll +new file mode 100644 +index 000000000000..a7498682559b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsra_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsra_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsra.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsra_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsra_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsra.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsra_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsra_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsra.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsra_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsra_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsra.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrai_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrai_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrai_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrai_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll +new file mode 100644 +index 000000000000..f59ae4c19662 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvsran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsran_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsran.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvsran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsran_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsran.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvsran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsran_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsran.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll +new file mode 100644 +index 000000000000..91fb90da9c52 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrani_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrani.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrani_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrani.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrani_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrani.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrani_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrani.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll +new file mode 100644 +index 000000000000..e2c160557c4d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsrar_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrar_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrar.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsrar_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrar_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrar.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsrar_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrar_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrar.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsrar_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrar_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrar.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrari_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrari.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrari_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrari.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrari_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrari.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrari_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrari.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll +new file mode 100644 +index 000000000000..02dd989773ca +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvsrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarn.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvsrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarn.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvsrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarn.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll +new file mode 100644 +index 000000000000..a7d2c3739793 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll +new file mode 100644 +index 000000000000..7b2992f2ca3b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsrl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsrl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsrl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsrl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrl.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll +new file mode 100644 +index 000000000000..dc5c0e016ea0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvsrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrln_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrln.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvsrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrln_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrln.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvsrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrln_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrln.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll +new file mode 100644 +index 000000000000..0301ebb195e2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll +new file mode 100644 +index 000000000000..e04504158e27 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsrlr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlr.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsrlr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlr.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsrlr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlr.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsrlr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlr.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrlri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlri.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrlri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlri.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrlri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlri.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrlri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlri.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll +new file mode 100644 +index 000000000000..1e7df379c6e1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvsrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrn.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvsrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrn.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvsrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrn.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll +new file mode 100644 +index 000000000000..56dbafe8b1ac +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll +new file mode 100644 +index 000000000000..da1857dad145 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssran_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.bu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssran_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.hu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssran_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.wu.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll +new file mode 100644 +index 000000000000..9efa659b4a1e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.bu.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.hu.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.wu.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.du.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll +new file mode 100644 +index 000000000000..b5d59ff06f4d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrarn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.bu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrarn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.hu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrarn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.wu.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll +new file mode 100644 +index 000000000000..da411dad645b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.bu.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.hu.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.wu.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.du.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll +new file mode 100644 +index 000000000000..c60b5bdf81a0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrln_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.bu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrln_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.hu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrln_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.wu.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll +new file mode 100644 +index 000000000000..e57dd426bde8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.bu.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.hu.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.wu.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.du.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll +new file mode 100644 +index 000000000000..774cf1bd5e84 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrlrn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.bu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrlrn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.hu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrlrn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.wu.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll +new file mode 100644 +index 000000000000..9a80516d8d78 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.bu.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.hu.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.wu.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.du.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll +new file mode 100644 +index 000000000000..cd3ccd9f5262 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvssub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvssub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvssub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvssub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvssub_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvssub_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvssub_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvssub_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll +new file mode 100644 +index 000000000000..b69e7b813f0c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll +@@ -0,0 +1,27 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) ++ ++define void @lasx_xvst(<32 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvst: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvst $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstx(<32 x i8>, i8*, i64) ++ ++define void @lasx_xvstx(<32 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstx: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a1, $zero, 1 ++; CHECK-NEXT: xvstx $xr0, $a0, $a1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstx(<32 x i8> %va, i8* %p, i64 1) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll +new file mode 100644 +index 000000000000..52ef3c471412 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstelm_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvstelm.b $xr0, $a0, 1, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstelm_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvstelm.h $xr0, $a0, 2, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstelm_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvstelm.w $xr0, $a0, 4, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstelm_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvstelm.d $xr0, $a0, 8, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 1) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll +new file mode 100644 +index 000000000000..4d69dd83dcde +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsub_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.q $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll +new file mode 100644 +index 000000000000..cc3235ff4657 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsubi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsubi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsubi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsubi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubi.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll +new file mode 100644 +index 000000000000..6f203e894990 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvsubwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvsubwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvsubwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsubwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvsubwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvsubwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvsubwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsubwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvsubwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvsubwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvsubwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsubwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvsubwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvsubwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvsubwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsubwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll +new file mode 100644 +index 000000000000..6395b3d6f2e7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvxor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvxor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll +new file mode 100644 +index 000000000000..c71d7e731165 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvxori_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvxori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvxori.b $xr0, $xr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 3) ++ ret <32 x i8> %res ++} +-- +2.20.1 + diff --git a/0005-LoongArch-Improve-codegen-for-atomic-cmpxchg-ops-693.patch b/0005-LoongArch-Improve-codegen-for-atomic-cmpxchg-ops-693.patch new file mode 100644 index 0000000..2c52921 --- /dev/null +++ b/0005-LoongArch-Improve-codegen-for-atomic-cmpxchg-ops-693.patch @@ -0,0 +1,204 @@ +From 0604fa00628899bf420fe2d0621b3fffbe52547c Mon Sep 17 00:00:00 2001 +From: Lu Weining <90239436+SixWeining@users.noreply.github.com> +Date: Thu, 19 Oct 2023 09:21:51 +0800 +Subject: [PATCH 5/7] [LoongArch] Improve codegen for atomic cmpxchg ops + (#69339) + +PR #67391 improved atomic codegen by handling memory ordering specified +by the `cmpxchg` instruction. An acquire barrier needs to be generated +when memory ordering includes an acquire operation. This PR improves the +codegen further by only handling the failure ordering. + +(cherry picked from commit 78abc45c44cdadf76b30e1f3dc24936bb5627d68) + +--- + .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- + .../LoongArch/LoongArchISelLowering.cpp | 7 ++- + .../Target/LoongArch/LoongArchInstrInfo.td | 55 ++++++++++++++++--- + .../ir-instruction/atomic-cmpxchg.ll | 8 +-- + 4 files changed, 56 insertions(+), 18 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +index b348cb56c136..18a532b55ee5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +@@ -571,11 +571,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } + +- AtomicOrdering Ordering = ++ AtomicOrdering FailureOrdering = + static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); + int hint; + +- switch (Ordering) { ++ switch (FailureOrdering) { + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 33a3197013cc..99328f09921f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -4492,8 +4492,9 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( + Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( + IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, + Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { +- Value *Ordering = +- Builder.getIntN(Subtarget.getGRLen(), static_cast(Ord)); ++ AtomicOrdering FailOrd = CI->getFailureOrdering(); ++ Value *FailureOrdering = ++ Builder.getIntN(Subtarget.getGRLen(), static_cast(FailOrd)); + + // TODO: Support cmpxchg on LA32. + Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; +@@ -4504,7 +4505,7 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( + Function *MaskedCmpXchg = + Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); + Value *Result = Builder.CreateCall( +- MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); ++ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index fcbd314507a5..ab1890556814 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1753,7 +1753,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; + + class PseudoCmpXchg + : Pseudo<(outs GPR:$res, GPR:$scratch), +- (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { ++ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$fail_order)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; +@@ -1767,7 +1767,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg; + def PseudoMaskedCmpXchg32 + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, +- grlenimm:$ordering)> { ++ grlenimm:$fail_order)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; +@@ -1785,6 +1785,43 @@ class AtomicPat + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), + (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; + ++// These atomic cmpxchg PatFrags only care about the failure ordering. ++// The PatFrags defined by multiclass `ternary_atomic_op_ord` in ++// TargetSelectionDAG.td care about the merged memory ordering that is the ++// stronger one between success and failure. But for LoongArch LL-SC we only ++// need to care about the failure ordering as explained in PR #67391. So we ++// define these PatFrags that will be used to define cmpxchg pats below. ++multiclass ternary_atomic_op_failure_ord { ++ def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Monotonic; ++ }]>; ++ def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Acquire; ++ }]>; ++ def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Release; ++ }]>; ++ def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::AcquireRelease; ++ }]>; ++ def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::SequentiallyConsistent; ++ }]>; ++} ++ ++defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord; ++defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord; ++ + let Predicates = [IsLA64] in { + def : AtomicPat; +@@ -1847,24 +1884,24 @@ def : AtomicPat { +- def : Pat<(vt (!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; +- def : Pat<(vt (!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; +- def : Pat<(vt (!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; +- def : Pat<(vt (!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; +- def : Pat<(vt (!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; + } + + defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; + defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; + def : Pat<(int_loongarch_masked_cmpxchg_i64 +- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), ++ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order), + (PseudoMaskedCmpXchg32 +- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; ++ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>; + + def : PseudoMaskedAMMinMaxPat; +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 174bb9d0ff7d..1dd3f39852d8 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -132,7 +132,7 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-NEXT: beqz $a5, .LBB4_1 + ; LA64-NEXT: b .LBB4_4 + ; LA64-NEXT: .LBB4_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB4_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic +@@ -166,7 +166,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin + ; LA64-NEXT: beqz $a5, .LBB5_1 + ; LA64-NEXT: b .LBB5_4 + ; LA64-NEXT: .LBB5_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB5_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic +@@ -185,7 +185,7 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin + ; LA64-NEXT: beqz $a4, .LBB6_1 + ; LA64-NEXT: b .LBB6_4 + ; LA64-NEXT: .LBB6_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB6_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic +@@ -204,7 +204,7 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin + ; LA64-NEXT: beqz $a4, .LBB7_1 + ; LA64-NEXT: b .LBB7_4 + ; LA64-NEXT: .LBB7_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB7_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic +-- +2.20.1 + diff --git a/0004-Backport-MC-LoongArch-Add-AlignFragment-size-if-layout-is-available-and-not-need-insert-nops.patch b/0005-MC-LoongArch-Add-AlignFragment-size-if-layout-is-ava.patch similarity index 98% rename from 0004-Backport-MC-LoongArch-Add-AlignFragment-size-if-layout-is-available-and-not-need-insert-nops.patch rename to 0005-MC-LoongArch-Add-AlignFragment-size-if-layout-is-ava.patch index 72b7924..720149b 100644 --- a/0004-Backport-MC-LoongArch-Add-AlignFragment-size-if-layout-is-available-and-not-need-insert-nops.patch +++ b/0005-MC-LoongArch-Add-AlignFragment-size-if-layout-is-ava.patch @@ -12,7 +12,6 @@ AlignFragment should be a constant. So we can add it to Displacement for folding A-B. (cherry picked from commit 0731567a31e4ade97c27801045156a88c4589704) -Change-Id: I554d6766bd7f688204e956e4a6431574b4c511c9 --- llvm/lib/MC/MCExpr.cpp | 6 +++++ llvm/test/MC/LoongArch/Misc/cfi-advance.s | 27 +++++++++++++++++++ diff --git a/0005-Revert-lld-test-LoongArch-Remove-the-test-for-R_LARC.patch b/0005-Revert-lld-test-LoongArch-Remove-the-test-for-R_LARC.patch new file mode 100644 index 0000000..a81c583 --- /dev/null +++ b/0005-Revert-lld-test-LoongArch-Remove-the-test-for-R_LARC.patch @@ -0,0 +1,47 @@ +From 7ae21a35eb24c423df5e62328da3d7a2dea847a0 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Mon, 25 Dec 2023 22:41:09 +0800 +Subject: [PATCH 05/23] Revert "[lld][test][LoongArch] Remove the test for + R_LARCH_CALL36 range checking" + +This reverts commit 0fbc728dba97149e530cfb7f2ada0283c398a7ce. + +In 88548df0fc08, both the .sec.foo and .tex sections used the same +section flags, hence sharing one segment, pushing the output file +size too large. This breaks on many buildbots. + +Now assign section .sec.foo different flags ("awx") from .text ("ax") +so that both sections get their own segment. + +(cherry picked from commit 6452395561eaae59e38f1df84f5413dffdb9169f) +--- + lld/test/ELF/loongarch-call36.s | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/lld/test/ELF/loongarch-call36.s b/lld/test/ELF/loongarch-call36.s +index 0a00adacbd6a..b593fdf1f604 100644 +--- a/lld/test/ELF/loongarch-call36.s ++++ b/lld/test/ELF/loongarch-call36.s +@@ -40,6 +40,10 @@ + # GOTPLT-NEXT: 0x01274730 00000000 00000000 00000000 00000000 + # GOTPLT-NEXT: 0x01274740 00452301 00000000 + ++# RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x2000020000 -o /dev/null 2>&1 | \ ++# RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-RANGE %s ++# ERROR-RANGE: error: [[FILE]]:(.text+0x0): relocation R_LARCH_CALL36 out of range: 137438953472 is not in [-137439084544, 137438822399]; references 'foo' ++ + ## Impossible case in reality becasue all LoongArch instructions are fixed 4-bytes long. + # RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x40001 -o /dev/null 2>&1 | \ + # RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-ALIGN %s +@@ -59,7 +63,7 @@ _start: + pcaddu18i $t0, 0 + jirl $zero, $t0, 0 + +-.section .sec.foo,"ax" ++.section .sec.foo,"awx" + .global foo + foo: + ret +-- +2.20.1 + diff --git a/0005-test-compiler-rt-Mark-several-tests-as-UNSUPPORTED-o.patch b/0005-test-compiler-rt-Mark-several-tests-as-UNSUPPORTED-o.patch new file mode 100644 index 0000000..2ed523a --- /dev/null +++ b/0005-test-compiler-rt-Mark-several-tests-as-UNSUPPORTED-o.patch @@ -0,0 +1,69 @@ +From b26071a4f4be011b6b3707025e9a5147b33683bf Mon Sep 17 00:00:00 2001 +From: Ami-zhang +Date: Fri, 27 Oct 2023 16:52:10 +0800 +Subject: [PATCH 05/27] [test][compiler-rt] Mark several tests as UNSUPPORTED + on LoongArch (#69699) + +(cherry picked from commit 75b0a99668cef7abaf36e09c41bb1eb91234bbf3) +--- + compiler-rt/test/fuzzer/exit_on_src_pos.test | 2 ++ + .../test/sanitizer_common/TestCases/Linux/odd_stack_size.cpp | 2 ++ + .../sanitizer_common/TestCases/Linux/release_to_os_test.cpp | 2 +- + compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp | 2 ++ + 4 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/compiler-rt/test/fuzzer/exit_on_src_pos.test b/compiler-rt/test/fuzzer/exit_on_src_pos.test +index 541e0c4c6e42..020424e2d9fd 100644 +--- a/compiler-rt/test/fuzzer/exit_on_src_pos.test ++++ b/compiler-rt/test/fuzzer/exit_on_src_pos.test +@@ -6,6 +6,8 @@ + + # Test does not complete on Armv7 Thumb build bot + UNSUPPORTED: target=thumb{{.*}} ++# Timeout on loongarch64 machine ++UNSUPPORTED: target=loongarch64{{.*}} + + RUN: %cpp_compiler -O0 %S/SimpleTest.cpp -o %t-SimpleTest.exe -mllvm -use-unknown-locations=Disable + RUN: %cpp_compiler -O0 %S/ShrinkControlFlowTest.cpp -o %t-ShrinkControlFlowTest.exe +diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/odd_stack_size.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/odd_stack_size.cpp +index fc31212b7f18..9d7d46b462a8 100644 +--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/odd_stack_size.cpp ++++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/odd_stack_size.cpp +@@ -5,6 +5,8 @@ + // AddressSanitizer: CHECK failed: asan_thread.cpp:315 "((AddrIsInStack((uptr)&local))) != (0)" + // https://lab.llvm.org/buildbot/#/builders/18/builds/8162 + // UNSUPPORTED: target=powerpc64{{.*}} ++/// Occasionally fail on loongarch64 machine ++// UNSUPPORTED: target=loongarch64{{.*}} + + #include + #include +diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/release_to_os_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/release_to_os_test.cpp +index 67351a916441..0fa77200bf1c 100644 +--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/release_to_os_test.cpp ++++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/release_to_os_test.cpp +@@ -3,7 +3,7 @@ + + // Temporarily disable test + // UNSUPPORTED: tsan +-// UNSUPPORTED: target=powerpc64{{.*}} ++// UNSUPPORTED: target={{(powerpc64|loongarch64).*}} + + // Not needed, no allocator. + // UNSUPPORTED: ubsan +diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp +index 3065981a2c9a..0ee8aaa755d5 100644 +--- a/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp ++++ b/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp +@@ -19,6 +19,8 @@ + // RUN: rm fdr-logging-test-* + // RUN: rm fdr-unwrite-test-* + // UNSUPPORTED: target=powerpc64le-{{.*}} ++/// TODO: FDR logging arg1 handler(__xray_ArgLoggerEntry) hasn't implemented yet on LoongArch ++// UNSUPPORTED: target=loongarch64{{.*}} + // REQUIRES: built-in-llvm-tree + + #include "xray/xray_log_interface.h" +-- +2.20.1 + diff --git a/0006-LoongArch-Add-testcases-of-LASX-intrinsics-with-imme.patch b/0006-LoongArch-Add-testcases-of-LASX-intrinsics-with-imme.patch new file mode 100644 index 0000000..d510379 --- /dev/null +++ b/0006-LoongArch-Add-testcases-of-LASX-intrinsics-with-imme.patch @@ -0,0 +1,5780 @@ +From a8d0c5a4d4b3f713fb817fd97b69b58fe9dbafd3 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:14:12 +0800 +Subject: [PATCH 06/42] [LoongArch] Add testcases of LASX intrinsics with + immediates + +The testcases mainly cover three situations: +- the arguments which should be immediates are non immediates. +- the immediate is out of upper limit of the argument type. +- the immediate is out of lower limit of the argument type. + +Depends on D155830 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D157571 + +(cherry picked from commit 82bbf7003cabe2b6be8ab9b88bc96ecb8a64dc49) + +--- + .../lasx/intrinsic-addi-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-addi-non-imm.ll | 37 +++++ + .../lasx/intrinsic-andi-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-andi-non-imm.ll | 10 ++ + .../lasx/intrinsic-bitclr-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-bitclr-non-imm.ll | 37 +++++ + .../lasx/intrinsic-bitrev-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-bitrev-non-imm.ll | 37 +++++ + .../lasx/intrinsic-bitseli-invalid-imm.ll | 17 +++ + .../lasx/intrinsic-bitseli-non-imm.ll | 10 ++ + .../lasx/intrinsic-bitset-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-bitset-non-imm.ll | 37 +++++ + .../lasx/intrinsic-bsll-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-bsll-non-imm.ll | 10 ++ + .../lasx/intrinsic-bsrl-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-bsrl-non-imm.ll | 10 ++ + .../lasx/intrinsic-extrins-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-extrins-non-imm.ll | 37 +++++ + .../lasx/intrinsic-frstp-invalid-imm.ll | 33 +++++ + .../LoongArch/lasx/intrinsic-frstp-non-imm.ll | 19 +++ + .../lasx/intrinsic-insgr2vr-invalid-imm.ll | 33 +++++ + .../lasx/intrinsic-insgr2vr-non-imm.ll | 19 +++ + .../lasx/intrinsic-insve0-invalid-imm.ll | 33 +++++ + .../lasx/intrinsic-insve0-non-imm.ll | 19 +++ + .../lasx/intrinsic-ld-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-ld-non-imm.ll | 10 ++ + .../lasx/intrinsic-ldi-invalid-imm.ll | 81 +++++++++++ + .../LoongArch/lasx/intrinsic-ldi-non-imm.ll | 46 +++++++ + .../lasx/intrinsic-ldrepl-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-ldrepl-non-imm.ll | 37 +++++ + .../lasx/intrinsic-max-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-max-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-min-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-min-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-nori-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-nori-non-imm.ll | 10 ++ + .../lasx/intrinsic-ori-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-ori-non-imm.ll | 10 ++ + .../lasx/intrinsic-permi-invalid-imm.ll | 49 +++++++ + .../LoongArch/lasx/intrinsic-permi-non-imm.ll | 28 ++++ + .../lasx/intrinsic-pickve-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-pickve-non-imm.ll | 37 +++++ + .../lasx/intrinsic-pickve2gr-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-pickve2gr-non-imm.ll | 37 +++++ + .../lasx/intrinsic-repl128vei-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-repl128vei-non-imm.ll | 37 +++++ + .../lasx/intrinsic-rotr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-rotr-non-imm.ll | 37 +++++ + .../lasx/intrinsic-sat-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-sat-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-seq-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-seq-non-imm.ll | 37 +++++ + .../lasx/intrinsic-shuf4i-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-shuf4i-non-imm.ll | 37 +++++ + .../lasx/intrinsic-sle-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-sle-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-sll-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-sll-non-imm.ll | 37 +++++ + .../lasx/intrinsic-sllwil-invalid-imm.ll | 97 +++++++++++++ + .../lasx/intrinsic-sllwil-non-imm.ll | 55 ++++++++ + .../lasx/intrinsic-slt-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-slt-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-sra-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-sra-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srani-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srani-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srar-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srar-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srarni-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-srarni-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srl-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srl-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srlni-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srlni-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srlr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srlr-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srlrni-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-srlrni-non-imm.ll | 37 +++++ + .../lasx/intrinsic-ssrani-invalid-imm.ll | 129 ++++++++++++++++++ + .../lasx/intrinsic-ssrani-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-ssrarni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lasx/intrinsic-ssrarni-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-ssrlni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lasx/intrinsic-ssrlni-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-ssrlrni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lasx/intrinsic-ssrlrni-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-st-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-st-non-imm.ll | 10 ++ + .../lasx/intrinsic-stelm-invalid-imm.ll | 121 ++++++++++++++++ + .../LoongArch/lasx/intrinsic-stelm-non-imm.ll | 65 +++++++++ + .../lasx/intrinsic-subi-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-subi-non-imm.ll | 37 +++++ + .../lasx/intrinsic-xori-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-xori-non-imm.ll | 10 ++ + 94 files changed, 5003 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll +new file mode 100644 +index 000000000000..4998847f0910 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvaddi_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvaddi_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvaddi_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvaddi_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvaddi_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvaddi_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvaddi_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvaddi_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll +new file mode 100644 +index 000000000000..f25f0e61a28e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll +new file mode 100644 +index 000000000000..60f0b765f954 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvandi_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvandi_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll +new file mode 100644 +index 000000000000..1273dc6b450b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvandi_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll +new file mode 100644 +index 000000000000..ecc287e89bbc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitclri_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbitclri_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitclri_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvbitclri_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitclri_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvbitclri_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitclri_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvbitclri_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll +new file mode 100644 +index 000000000000..09da85411082 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll +new file mode 100644 +index 000000000000..dff0884fdd5a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitrevi_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbitrevi_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitrevi_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvbitrevi_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitrevi_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvbitrevi_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitrevi_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvbitrevi_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll +new file mode 100644 +index 000000000000..e1aef1a82f0c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll +new file mode 100644 +index 000000000000..3f6fd44f842c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseli_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbitseli_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll +new file mode 100644 +index 000000000000..40533ab96d86 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll +new file mode 100644 +index 000000000000..17a77ece7775 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseti_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbitseti_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitseti_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvbitseti_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitseti_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvbitseti_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitseti_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvbitseti_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll +new file mode 100644 +index 000000000000..613285804e0e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll +new file mode 100644 +index 000000000000..1da08a633bd2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsll_v_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbsll_v_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll +new file mode 100644 +index 000000000000..e19a3232c179 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll +new file mode 100644 +index 000000000000..5d2b63391e67 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsrl_v_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbsrl_v_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll +new file mode 100644 +index 000000000000..8dfd0ca579b8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll +new file mode 100644 +index 000000000000..1301b8a146eb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvextrins_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvextrins_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 256) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvextrins_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvextrins_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 256) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvextrins_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvextrins_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 256) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvextrins_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvextrins_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 256) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll +new file mode 100644 +index 000000000000..bca8f8b3c778 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll +new file mode 100644 +index 000000000000..64b4632669d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll +@@ -0,0 +1,33 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvfrstpi_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvfrstpi_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvfrstpi_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvfrstpi_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll +new file mode 100644 +index 000000000000..ca92cff9b2d1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll +@@ -0,0 +1,19 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll +new file mode 100644 +index 000000000000..4982f2c7d43a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll +@@ -0,0 +1,33 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) ++ ++define <8 x i32> @lasx_xvinsgr2vr_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvinsgr2vr_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 8) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) ++ ++define <4 x i64> @lasx_xvinsgr2vr_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvinsgr2vr_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 4) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll +new file mode 100644 +index 000000000000..3accabf6dbd9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll +@@ -0,0 +1,19 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) ++ ++define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) ++ ++define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll +new file mode 100644 +index 000000000000..a54fa8515fba +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll +@@ -0,0 +1,33 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvinsve0_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvinsve0_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 8) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvinsve0_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvinsve0_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 4) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll +new file mode 100644 +index 000000000000..53e59db11aa6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll +@@ -0,0 +1,19 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll +new file mode 100644 +index 000000000000..20dd8a45d7f0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) ++ ++define <32 x i8> @lasx_xvld_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvld: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 -2049) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvld_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvld: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 2048) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll +new file mode 100644 +index 000000000000..b23436a44832 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) ++ ++define <32 x i8> @lasx_xvld(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 %a) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll +new file mode 100644 +index 000000000000..f3dd3650cf8a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll +@@ -0,0 +1,81 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) ++ ++define <4 x i64> @lasx_xvldi_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvldi: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 -4097) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvldi_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvldi: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 4096) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) ++ ++define <32 x i8> @lasx_xvrepli_b_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 -513) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvrepli_b_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 512) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) ++ ++define <16 x i16> @lasx_xvrepli_h_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 -513) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvrepli_h_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 512) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) ++ ++define <8 x i32> @lasx_xvrepli_w_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 -513) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvrepli_w_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 512) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) ++ ++define <4 x i64> @lasx_xvrepli_d_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 -513) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvrepli_d_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 512) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll +new file mode 100644 +index 000000000000..6466818bf674 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll +@@ -0,0 +1,46 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) ++ ++define <4 x i64> @lasx_xvldi(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 %a) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) ++ ++define <32 x i8> @lasx_xvrepli_b(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 %a) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) ++ ++define <16 x i16> @lasx_xvrepli_h(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 %a) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) ++ ++define <8 x i32> @lasx_xvrepli_w(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 %a) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) ++ ++define <4 x i64> @lasx_xvrepli_d(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 %a) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll +new file mode 100644 +index 000000000000..cb62a839985a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) ++ ++define <32 x i8> @lasx_xvldrepl_b_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 -2049) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvldrepl_b_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 2048) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) ++ ++define <16 x i16> @lasx_xvldrepl_h_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 -2050) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvldrepl_h_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2048) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) ++ ++define <8 x i32> @lasx_xvldrepl_w_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 -2052) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvldrepl_w_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 2048) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) ++ ++define <4 x i64> @lasx_xvldrepl_d_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 -2056) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvldrepl_d_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 2048) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll +new file mode 100644 +index 000000000000..075d663b0dd7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) ++ ++define <32 x i8> @lasx_xvldrepl_b(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 %a) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) ++ ++define <16 x i16> @lasx_xvldrepl_h(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 %a) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) ++ ++define <8 x i32> @lasx_xvldrepl_w(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 %a) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) ++ ++define <4 x i64> @lasx_xvldrepl_d(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 %a) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll +new file mode 100644 +index 000000000000..a671e9979b2f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvmaxi_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvmaxi_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvmaxi_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvmaxi_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvmaxi_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvmaxi_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvmaxi_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvmaxi_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll +new file mode 100644 +index 000000000000..b85798b53c92 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll +new file mode 100644 +index 000000000000..5ed4104c295f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvmini_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvmini_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvmini_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvmini_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvmini_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvmini_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvmini_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvmini_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll +new file mode 100644 +index 000000000000..b81931977aad +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll +new file mode 100644 +index 000000000000..1130e094bf1f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvnori_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvnori_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll +new file mode 100644 +index 000000000000..8f2333064d64 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvnori_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll +new file mode 100644 +index 000000000000..90dec8e55f2d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvori_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvori_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll +new file mode 100644 +index 000000000000..ae6571d98f4a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvori_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll +new file mode 100644 +index 000000000000..41f4856bd8f7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll +@@ -0,0 +1,49 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpermi_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvpermi_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 256) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpermi_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvpermi_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 256) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvpermi_q_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvpermi_q_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll +new file mode 100644 +index 000000000000..afb335c5d6ca +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll +@@ -0,0 +1,28 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll +new file mode 100644 +index 000000000000..cfc6ec42874e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpickve_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvpickve_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 8) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpickve_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvpickve_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 4) ++ ret <4 x i64> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) ++ ++define <8 x float> @lasx_xvpickve_w_f_lo(<8 x float> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 -1) ++ ret <8 x float> %res ++} ++ ++define <8 x float> @lasx_xvpickve_w_f_hi(<8 x float> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 8) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) ++ ++define <4 x double> @lasx_xvpickve_d_f_lo(<4 x double> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 -1) ++ ret <4 x double> %res ++} ++ ++define <4 x double> @lasx_xvpickve_d_f_hi(<4 x double> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 4) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll +new file mode 100644 +index 000000000000..be1f19a89737 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) ++ ++define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 %c) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) ++ ++define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 %c) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll +new file mode 100644 +index 000000000000..93056b272dfc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lasx_xvpickve2gr_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 8) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 -1) ++ ret i64 %res ++} ++ ++define i64 @lasx_xvpickve2gr_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 4) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lasx_xvpickve2gr_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 8) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 -1) ++ ret i64 %res ++} ++ ++define i64 @lasx_xvpickve2gr_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 4) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll +new file mode 100644 +index 000000000000..0fa8c94adc60 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 %b) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 %b) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll +new file mode 100644 +index 000000000000..a0cb309c54e1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrepl128vei_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvrepl128vei_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrepl128vei_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvrepl128vei_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 8) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrepl128vei_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvrepl128vei_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 4) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrepl128vei_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvrepl128vei_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 2) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll +new file mode 100644 +index 000000000000..c537ffa66ba7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll +new file mode 100644 +index 000000000000..40abdf497605 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrotri_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvrotri_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrotri_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvrotri_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrotri_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvrotri_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrotri_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvrotri_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll +new file mode 100644 +index 000000000000..dd38301d0534 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll +new file mode 100644 +index 000000000000..839fbc9990d3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsat_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsat_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsat_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsat_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsat_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsat_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsat_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsat_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll +new file mode 100644 +index 000000000000..b73b32ebd3b0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll +new file mode 100644 +index 000000000000..bb6ef0cc6574 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvseqi_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvseqi_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvseqi_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvseqi_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvseqi_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvseqi_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvseqi_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvseqi_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll +new file mode 100644 +index 000000000000..fb2c6206da7b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll +new file mode 100644 +index 000000000000..9217d1f6a05d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvshuf4i_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvshuf4i_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvshuf4i_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvshuf4i_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 256) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvshuf4i_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvshuf4i_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 256) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvshuf4i_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvshuf4i_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 256) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll +new file mode 100644 +index 000000000000..8d6d1c694193 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll +new file mode 100644 +index 000000000000..5b10aca9801d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslei_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslei_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslei_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslei_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslei_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslei_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslei_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslei_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll +new file mode 100644 +index 000000000000..903bc10d88b7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll +new file mode 100644 +index 000000000000..bf8205376a6c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslli_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslli_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslli_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslli_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslli_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslli_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslli_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslli_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll +new file mode 100644 +index 000000000000..b5368a86b5c3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslli_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslli_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslli_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslli_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll +new file mode 100644 +index 000000000000..18803767d6c0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll +@@ -0,0 +1,97 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_h_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsllwil_h_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 8) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_w_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsllwil_w_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_d_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsllwil_d_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 32) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_hu_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsllwil_hu_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 8) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_wu_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsllwil_wu_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_du_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsllwil_du_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll +new file mode 100644 +index 000000000000..3f5d4d631671 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll +@@ -0,0 +1,55 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll +new file mode 100644 +index 000000000000..dc0567da4e47 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslti_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslti_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslti_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslti_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslti_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslti_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslti_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslti_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll +new file mode 100644 +index 000000000000..a2cedc8d3ef3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll +new file mode 100644 +index 000000000000..15b522d5e7e3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrai_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrai_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrai_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrai_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrai_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrai_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrai_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrai_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll +new file mode 100644 +index 000000000000..fefee7246ae6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll +new file mode 100644 +index 000000000000..bedbfc4889d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll +new file mode 100644 +index 000000000000..3c17f2b6090a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll +new file mode 100644 +index 000000000000..e417e3cc5bbf +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrari_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrari_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrari_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrari_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrari_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrari_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrari_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrari_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll +new file mode 100644 +index 000000000000..15fed7966f1c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll +new file mode 100644 +index 000000000000..83e977827e2d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll +new file mode 100644 +index 000000000000..eb577a29fb33 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll +new file mode 100644 +index 000000000000..3ab02dcb97ed +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrli_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrli_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrli_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrli_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrli_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrli_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrli_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrli_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll +new file mode 100644 +index 000000000000..bc085aeaa232 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll +new file mode 100644 +index 000000000000..9e7c94305630 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll +new file mode 100644 +index 000000000000..66d800470003 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll +new file mode 100644 +index 000000000000..52621ddc6f49 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlri_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrlri_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlri_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrlri_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlri_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrlri_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlri_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrlri_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll +new file mode 100644 +index 000000000000..5663e3475b12 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll +new file mode 100644 +index 000000000000..2d65a75b175a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll +new file mode 100644 +index 000000000000..82da0d21d013 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll +new file mode 100644 +index 000000000000..e10d5d7bd488 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrani_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrani_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrani_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrani_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll +new file mode 100644 +index 000000000000..a928cc2de8c8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll +new file mode 100644 +index 000000000000..42cd6ac99754 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrarni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrarni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrarni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrarni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll +new file mode 100644 +index 000000000000..f050e7d79b0f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll +new file mode 100644 +index 000000000000..26be21a83aa4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrlni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrlni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrlni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrlni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll +new file mode 100644 +index 000000000000..72da2a746dd5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll +new file mode 100644 +index 000000000000..cd778e2c0627 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrlrni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrlrni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrlrni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrlrni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll +new file mode 100644 +index 000000000000..a10c54329149 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll +new file mode 100644 +index 000000000000..0177f2b77b93 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) ++ ++define void @lasx_xvst_lo(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvst: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 -2049) ++ ret void ++} ++ ++define void @lasx_xvst_hi(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvst: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 2048) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll +new file mode 100644 +index 000000000000..c19207aad6b8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) ++ ++define void @lasx_xvst(<32 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 %b) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll +new file mode 100644 +index 000000000000..0ea2484e090d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll +@@ -0,0 +1,121 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_b_lo(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 -129, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_b_hi(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 128, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_b_idx_lo(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 -1) ++ ret void ++} ++ ++define void @lasx_xvstelm_b_idx_hi(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 32) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_h_lo(<16 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 -258, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_h_hi(<16 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 256, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_h_idx_lo(<16 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 -1) ++ ret void ++} ++ ++define void @lasx_xvstelm_h_idx_hi(<16 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 16) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_w_lo(<8 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 -516, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_w_hi(<8 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 512, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_w_idx_lo(<8 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 -1) ++ ret void ++} ++ ++define void @lasx_xvstelm_w_idx_hi(<8 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 8) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_d_lo(<4 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 -1032, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_d_hi(<4 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 1024, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_d_idx_lo(<4 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 -1) ++ ret void ++} ++ ++define void @lasx_xvstelm_d_idx_hi(<4 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 4) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll +new file mode 100644 +index 000000000000..42c7c0da1746 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_b_idx(<32 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_h_idx(<16 x i16> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_w_idx(<8 x i32> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_d_idx(<4 x i64> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 %b) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll +new file mode 100644 +index 000000000000..810008c17f7e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsubi_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsubi_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsubi_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsubi_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsubi_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsubi_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsubi_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsubi_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll +new file mode 100644 +index 000000000000..924b89ce9d6c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll +new file mode 100644 +index 000000000000..0170d204cf42 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvxori_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvxori_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll +new file mode 100644 +index 000000000000..1478f691a1cc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvxori_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +-- +2.20.1 + diff --git a/0006-LoongArch-Emit-function-call-code-sequence-as-PCADDU.patch b/0006-LoongArch-Emit-function-call-code-sequence-as-PCADDU.patch new file mode 100644 index 0000000..1cb026c --- /dev/null +++ b/0006-LoongArch-Emit-function-call-code-sequence-as-PCADDU.patch @@ -0,0 +1,442 @@ +From 416ff08426501d5f3c862e4b44a501a76a80be53 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 2 Jan 2024 10:55:02 +0800 +Subject: [PATCH 06/23] [LoongArch] Emit function call code sequence as + `PCADDU18I+JIRL` in medium code model + +According to the description of the psABI v2.20: +https://github.com/loongson/la-abi-specs/releases/tag/v2.20, adjustments +are made to the function call instructions under the medium code model. + +At the same time, AsmParser has already supported parsing the call36 and +tail36 macro instructions. + +(cherry picked from commit 2cf420d5b846a4733ef0ef7c8ed0ae0bfd1c6772) +--- + .../AsmParser/LoongArchAsmParser.cpp | 61 +++++++++++++++++++ + .../LoongArch/LoongArchExpandPseudoInsts.cpp | 29 ++++----- + .../Target/LoongArch/LoongArchInstrInfo.td | 23 ++++++- + .../Target/LoongArch/LoongArchMCInstLower.cpp | 3 + + .../LoongArch/LoongArchTargetMachine.cpp | 4 +- + .../MCTargetDesc/LoongArchBaseInfo.h | 1 + + .../MCTargetDesc/LoongArchELFObjectWriter.cpp | 2 + + .../MCTargetDesc/LoongArchFixupKinds.h | 3 + + .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 3 + + .../MCTargetDesc/LoongArchMCExpr.cpp | 3 + + .../LoongArch/MCTargetDesc/LoongArchMCExpr.h | 1 + + llvm/test/CodeGen/LoongArch/code-models.ll | 12 ++-- + .../MC/LoongArch/Basic/Integer/invalid64.s | 2 +- + llvm/test/MC/LoongArch/Macros/macros-call.s | 9 +++ + .../MC/LoongArch/Relocations/relocations.s | 5 ++ + 15 files changed, 133 insertions(+), 28 deletions(-) + create mode 100644 llvm/test/MC/LoongArch/Macros/macros-call.s + +diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +index a132e645c864..f908e5bc63d3 100644 +--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp ++++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +@@ -122,6 +122,10 @@ class LoongArchAsmParser : public MCTargetAsmParser { + // Helper to emit pseudo instruction "li.w/d $rd, $imm". + void emitLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + ++ // Helper to emit pseudo instruction "call36 sym" or "tail36 $rj, sym". ++ void emitFuncCall36(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ bool IsTailCall); ++ + public: + enum LoongArchMatchResultTy { + Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, +@@ -401,6 +405,22 @@ public: + IsValidKind; + } + ++ bool isSImm20pcaddu18i() const { ++ if (!isImm()) ++ return false; ++ ++ int64_t Imm; ++ LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; ++ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); ++ bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || ++ VK == LoongArchMCExpr::VK_LoongArch_CALL36; ++ ++ return IsConstantImm ++ ? isInt<20>(Imm) && IsValidKind ++ : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && ++ IsValidKind; ++ } ++ + bool isSImm21lsl2() const { + if (!isImm()) + return false; +@@ -1111,6 +1131,35 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc, + } + } + ++void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, bool IsTailCall) { ++ // call36 sym ++ // expands to: ++ // pcaddu18i $ra, %call36(sym) ++ // jirl $ra, $ra, 0 ++ // ++ // tail36 $rj, sym ++ // expands to: ++ // pcaddu18i $rj, %call36(sym) ++ // jirl $r0, $rj, 0 ++ unsigned ScratchReg = ++ IsTailCall ? Inst.getOperand(0).getReg() : (unsigned)LoongArch::R1; ++ const MCExpr *Sym = ++ IsTailCall ? Inst.getOperand(1).getExpr() : Inst.getOperand(0).getExpr(); ++ const LoongArchMCExpr *LE = LoongArchMCExpr::create( ++ Sym, llvm::LoongArchMCExpr::VK_LoongArch_CALL36, getContext()); ++ ++ Out.emitInstruction( ++ MCInstBuilder(LoongArch::PCADDU18I).addReg(ScratchReg).addExpr(LE), ++ getSTI()); ++ Out.emitInstruction( ++ MCInstBuilder(LoongArch::JIRL) ++ .addReg(IsTailCall ? (unsigned)LoongArch::R0 : ScratchReg) ++ .addReg(ScratchReg) ++ .addImm(0), ++ getSTI()); ++} ++ + bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + OperandVector &Operands, + MCStreamer &Out) { +@@ -1159,6 +1208,12 @@ bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + case LoongArch::PseudoLI_D: + emitLoadImm(Inst, IDLoc, Out); + return false; ++ case LoongArch::PseudoCALL36: ++ emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/false); ++ return false; ++ case LoongArch::PseudoTAIL36: ++ emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/true); ++ return false; + } + Out.emitInstruction(Inst, getSTI()); + return false; +@@ -1440,6 +1495,12 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + /*Upper=*/(1 << 19) - 1, + "operand must be a symbol with modifier (e.g. %pc_hi20) or an integer " + "in the range"); ++ case Match_InvalidSImm20pcaddu18i: ++ return generateImmOutOfRangeError( ++ Operands, ErrorInfo, /*Lower=*/-(1 << 19), ++ /*Upper=*/(1 << 19) - 1, ++ "operand must be a symbol with modifier (e.g. %call36) or an integer " ++ "in the range"); + case Match_InvalidSImm21lsl2: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +index 72c1f1cec198..8eda2dcc1633 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +@@ -458,11 +458,11 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL( + } + case CodeModel::Medium: { + // CALL: +- // pcalau12i $ra, %pc_hi20(func) +- // jirl $ra, $ra, %pc_lo12(func) ++ // pcaddu18i $ra, %call36(func) ++ // jirl $ra, $ra, 0 + // TAIL: +- // pcalau12i $scratch, %pc_hi20(func) +- // jirl $r0, $scratch, %pc_lo12(func) ++ // pcaddu18i $scratch, %call36(func) ++ // jirl $r0, $scratch, 0 + Opcode = + IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; + Register ScratchReg = +@@ -470,18 +470,15 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL( + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : LoongArch::R1; + MachineInstrBuilder MIB = +- BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg); +- CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg); +- if (Func.isSymbol()) { +- const char *FnName = Func.getSymbolName(); +- MIB.addExternalSymbol(FnName, LoongArchII::MO_PCREL_HI); +- CALL.addExternalSymbol(FnName, LoongArchII::MO_PCREL_LO); +- break; +- } +- assert(Func.isGlobal() && "Expected a GlobalValue at this time"); +- const GlobalValue *GV = Func.getGlobal(); +- MIB.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_HI); +- CALL.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_LO); ++ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg); ++ ++ CALL = ++ BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0); ++ ++ if (Func.isSymbol()) ++ MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36); ++ else ++ MIB.addDisp(Func, 0, LoongArchII::MO_CALL36); + break; + } + case CodeModel::Large: { +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index ab1890556814..67de5f7afd78 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -351,6 +351,10 @@ def simm20_lu32id : SImm20Operand { + let ParserMatchClass = SImmAsmOperand<20, "lu32id">; + } + ++def simm20_pcaddu18i : SImm20Operand { ++ let ParserMatchClass = SImmAsmOperand<20, "pcaddu18i">; ++} ++ + def simm21_lsl2 : Operand { + let ParserMatchClass = SImmAsmOperand<21, "lsl2">; + let EncoderMethod = "getImmOpValueAsr<2>"; +@@ -772,7 +776,7 @@ def LU32I_D : Fmt1RI20<0x16000000, (outs GPR:$dst), + "$rd, $imm20">; + } + def LU52I_D : ALU_2RI12<0x03000000, simm12_lu52id>; +-def PCADDU18I : ALU_1RI20<0x1e000000, simm20>; ++def PCADDU18I : ALU_1RI20<0x1e000000, simm20_pcaddu18i>; + def MUL_D : ALU_3R<0x001d8000>; + def MULH_D : ALU_3R<0x001e0000>; + def MULH_DU : ALU_3R<0x001e8000>; +@@ -1324,7 +1328,7 @@ def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), + (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; + + let isCall = 1, Defs = [R1] in +-def PseudoCALL : Pseudo<(outs), (ins simm26_symbol:$func)>; ++def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func)>; + + def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; + def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; +@@ -1344,7 +1348,7 @@ def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, + PseudoInstExpansion<(JIRL R0, R1, 0)>; + + let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in +-def PseudoTAIL : Pseudo<(outs), (ins simm26_symbol:$dst)>; ++def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst)>; + + def : Pat<(loongarch_tail (iPTR tglobaladdr:$dst)), + (PseudoTAIL tglobaladdr:$dst)>; +@@ -1367,6 +1371,19 @@ def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, + PseudoInstExpansion<(JIRL R0, GPR:$rj, + simm16_lsl2:$imm16)>; + ++/// call36/taill36 macro instructions ++let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, isAsmParserOnly = 1, ++ Defs = [R1], Size = 8, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in ++def PseudoCALL36 : Pseudo<(outs), (ins bare_symbol:$dst), [], ++ "call36", "$dst">, ++ Requires<[IsLA64]>; ++let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3], ++ isCodeGenOnly = 0, isAsmParserOnly = 1, Size = 8, hasSideEffects = 0, ++ mayStore = 0, mayLoad = 0 in ++def PseudoTAIL36 : Pseudo<(outs), (ins GPR:$tmp, bare_symbol:$dst), [], ++ "tail36", "$tmp, $dst">, ++ Requires<[IsLA64]>; ++ + /// Load address (la*) macro instructions. + + // Define isCodeGenOnly = 0 to expose them to tablegened assembly parser. +diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +index 5daa9481c907..98ad49f25e3f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +@@ -95,6 +95,9 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, + case LoongArchII::MO_GD_PC_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20; + break; ++ case LoongArchII::MO_CALL36: ++ Kind = LoongArchMCExpr::VK_LoongArch_CALL36; ++ break; + // TODO: Handle more target-flags. + } + +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +index d0a4e9375048..0efc5e6ebb99 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +@@ -63,11 +63,11 @@ getEffectiveLoongArchCodeModel(const Triple &TT, + + switch (*CM) { + case CodeModel::Small: +- case CodeModel::Medium: + return *CM; ++ case CodeModel::Medium: + case CodeModel::Large: + if (!TT.isArch64Bit()) +- report_fatal_error("Large code model requires LA64"); ++ report_fatal_error("Medium/Large code model requires LA64"); + return *CM; + default: + report_fatal_error( +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +index cee6dad1f095..0692cb92b694 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +@@ -47,6 +47,7 @@ enum { + MO_IE_PC64_HI, + MO_LD_PC_HI, + MO_GD_PC_HI, ++ MO_CALL36 + // TODO: Add more flags. + }; + } // end namespace LoongArchII +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +index e60b9c2cfd97..0a52380dd2cd 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +@@ -90,6 +90,8 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, + return ELF::R_LARCH_TLS_LE64_LO20; + case LoongArch::fixup_loongarch_tls_le64_hi12: + return ELF::R_LARCH_TLS_LE64_HI12; ++ case LoongArch::fixup_loongarch_call36: ++ return ELF::R_LARCH_CALL36; + // TODO: Handle more fixup-kinds. + } + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +index 78414408f21f..0d19d2b0fb1f 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +@@ -111,6 +111,9 @@ enum Fixups { + fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX, + // Generate an R_LARCH_ALIGN which indicates the linker may fixup align here. + fixup_loongarch_align = FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN, ++ // 36-bit fixup corresponding to %call36(foo) for a pair instructions: ++ // pcaddu18i+jirl. ++ fixup_loongarch_call36 = FirstLiteralRelocationKind + ELF::R_LARCH_CALL36, + }; + } // end namespace LoongArch + } // end namespace llvm +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +index 09d92ac9aa3a..7c4fe9674d4e 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +@@ -241,6 +241,9 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, + case LoongArchMCExpr::VK_LoongArch_TLS_GD_HI20: + FixupKind = LoongArch::fixup_loongarch_tls_gd_hi20; + break; ++ case LoongArchMCExpr::VK_LoongArch_CALL36: ++ FixupKind = LoongArch::fixup_loongarch_call36; ++ break; + } + } else if (Kind == MCExpr::SymbolRef && + cast(Expr)->getKind() == +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +index 82c992b1cc8c..8ca8876a19b9 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +@@ -138,6 +138,8 @@ StringRef LoongArchMCExpr::getVariantKindName(VariantKind Kind) { + return "gd_pc_hi20"; + case VK_LoongArch_TLS_GD_HI20: + return "gd_hi20"; ++ case VK_LoongArch_CALL36: ++ return "call36"; + } + } + +@@ -180,6 +182,7 @@ LoongArchMCExpr::getVariantKindForName(StringRef name) { + .Case("ld_hi20", VK_LoongArch_TLS_LD_HI20) + .Case("gd_pc_hi20", VK_LoongArch_TLS_GD_PC_HI20) + .Case("gd_hi20", VK_LoongArch_TLS_GD_HI20) ++ .Case("call36", VK_LoongArch_CALL36) + .Default(VK_LoongArch_Invalid); + } + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +index 93251f824103..bd828116d7fa 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +@@ -61,6 +61,7 @@ public: + VK_LoongArch_TLS_LD_HI20, + VK_LoongArch_TLS_GD_PC_HI20, + VK_LoongArch_TLS_GD_HI20, ++ VK_LoongArch_CALL36, + VK_LoongArch_Invalid // Must be the last item. + }; + +diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll +index c610f645a06a..7c6f46d5e926 100644 +--- a/llvm/test/CodeGen/LoongArch/code-models.ll ++++ b/llvm/test/CodeGen/LoongArch/code-models.ll +@@ -23,8 +23,8 @@ define i32 @call_globaladdress(i32 %a) nounwind { + ; MEDIUM: # %bb.0: + ; MEDIUM-NEXT: addi.d $sp, $sp, -16 + ; MEDIUM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(callee) +-; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(callee) ++; MEDIUM-NEXT: pcaddu18i $ra, %call36(callee) ++; MEDIUM-NEXT: jirl $ra, $ra, 0 + ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload + ; MEDIUM-NEXT: addi.d $sp, $sp, 16 + ; MEDIUM-NEXT: ret +@@ -68,8 +68,8 @@ define void @call_external_sym(ptr %dst) { + ; MEDIUM-NEXT: .cfi_offset 1, -8 + ; MEDIUM-NEXT: ori $a2, $zero, 1000 + ; MEDIUM-NEXT: move $a1, $zero +-; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(memset) +-; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(memset) ++; MEDIUM-NEXT: pcaddu18i $ra, %call36(memset) ++; MEDIUM-NEXT: jirl $ra, $ra, 0 + ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload + ; MEDIUM-NEXT: addi.d $sp, $sp, 16 + ; MEDIUM-NEXT: ret +@@ -105,8 +105,8 @@ define i32 @caller_tail(i32 %i) nounwind { + ; + ; MEDIUM-LABEL: caller_tail: + ; MEDIUM: # %bb.0: # %entry +-; MEDIUM-NEXT: pcalau12i $a1, %pc_hi20(callee_tail) +-; MEDIUM-NEXT: jirl $zero, $a1, %pc_lo12(callee_tail) ++; MEDIUM-NEXT: pcaddu18i $a1, %call36(callee_tail) ++; MEDIUM-NEXT: jr $a1 + ; + ; LARGE-LABEL: caller_tail: + ; LARGE: # %bb.0: # %entry +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s +index acddca9432a6..1c1c658ad440 100644 +--- a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s ++++ b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s +@@ -65,7 +65,7 @@ addu16i.d $a0, $a0, 32768 + + ## simm20 + pcaddu18i $a0, 0x80000 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] ++# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %call36) or an integer in the range [-524288, 524287] + + ## simm20_lu32id + lu32i.d $a0, 0x80000 +diff --git a/llvm/test/MC/LoongArch/Macros/macros-call.s b/llvm/test/MC/LoongArch/Macros/macros-call.s +new file mode 100644 +index 000000000000..a648a3978038 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Macros/macros-call.s +@@ -0,0 +1,9 @@ ++# RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s ++ ++call36 sym_call ++# CHECK: pcaddu18i $ra, %call36(sym_call) ++# CHECK-NEXT: jirl $ra, $ra, 0 ++ ++tail36 $t0, sym_tail ++# CHECK: pcaddu18i $t0, %call36(sym_tail) ++# CHECK-NEXT: jr $t0 +diff --git a/llvm/test/MC/LoongArch/Relocations/relocations.s b/llvm/test/MC/LoongArch/Relocations/relocations.s +index 042cc93470a1..bec71e103893 100644 +--- a/llvm/test/MC/LoongArch/Relocations/relocations.s ++++ b/llvm/test/MC/LoongArch/Relocations/relocations.s +@@ -218,3 +218,8 @@ lu12i.w $t1, %gd_hi20(foo) + # RELOC: R_LARCH_TLS_GD_HI20 foo 0x0 + # INSTR: lu12i.w $t1, %gd_hi20(foo) + # FIXUP: fixup A - offset: 0, value: %gd_hi20(foo), kind: FK_NONE ++ ++pcaddu18i $t1, %call36(foo) ++# RELOC: R_LARCH_CALL36 foo 0x0 ++# INSTR: pcaddu18i $t1, %call36(foo) ++# FIXUP: fixup A - offset: 0, value: %call36(foo), kind: FK_NONE +-- +2.20.1 + diff --git a/0006-LoongArch-Override-LoongArchTargetLowering-getExtend.patch b/0006-LoongArch-Override-LoongArchTargetLowering-getExtend.patch new file mode 100644 index 0000000..2d829a2 --- /dev/null +++ b/0006-LoongArch-Override-LoongArchTargetLowering-getExtend.patch @@ -0,0 +1,1142 @@ +From 120922e20f54392ccb9e60050e8c2531e284b8aa Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Mon, 4 Mar 2024 08:38:52 +0800 +Subject: [PATCH 6/7] [LoongArch] Override + LoongArchTargetLowering::getExtendForAtomicCmpSwapArg (#83656) + +This patch aims to solve Firefox issue: +https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 + +Similar to 616289ed2922. Currently LoongArch uses an ll.[wd]/sc.[wd] +loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is +full-width (i.e. the `bne` instruction), we must sign extend the input +comparsion argument. + +Note that LoongArch ISA manual V1.1 has introduced compare-and-swap +instructions. We would change the implementation (return `ANY_EXTEND`) +when we support them. + +(cherry picked from commit 5f058aa211995d2f0df2a0e063532832569cb7a8) +(cherry picked from commit ea6c457b8dd2d0e6a7f05b4a5bdd2686085e1ec0) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 5 + + .../Target/LoongArch/LoongArchISelLowering.h | 2 + + .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 120 +++++++------ + .../ir-instruction/atomic-cmpxchg.ll | 25 +-- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 160 +++++++++--------- + 5 files changed, 159 insertions(+), 153 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 99328f09921f..4fc2b4709840 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -4893,3 +4893,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { + + return !isa(Y); + } ++ ++ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { ++ // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. ++ return ISD::SIGN_EXTEND; ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 23b90640a690..2c9826a13237 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -203,6 +203,8 @@ public: + return ISD::SIGN_EXTEND; + } + ++ ISD::NodeType getExtendForAtomicCmpSwapArg() const override; ++ + Register getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +index d8908acbc945..f0baf19bcf0e 100644 +--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +@@ -26,15 +26,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: andi $a5, $a5, 255 + ; LA64-NEXT: sll.w $a5, $a5, $a0 + ; LA64-NEXT: and $a6, $a3, $a4 +-; LA64-NEXT: or $a6, $a6, $a5 ++; LA64-NEXT: or $a5, $a6, $a5 ++; LA64-NEXT: addi.w $a6, $a3, 0 + ; LA64-NEXT: .LBB0_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB0_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a5, $a2, 0 +-; LA64-NEXT: bne $a5, $a3, .LBB0_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a6, .LBB0_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64-NEXT: move $a7, $a6 ++; LA64-NEXT: move $a7, $a5 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB0_3 + ; LA64-NEXT: b .LBB0_6 +@@ -43,11 +44,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: addi.w $a6, $a3, 0 +-; LA64-NEXT: move $a3, $a5 +-; LA64-NEXT: bne $a5, $a6, .LBB0_1 ++; LA64-NEXT: bne $a3, $a6, .LBB0_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +@@ -79,15 +78,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 + ; LA64-NEXT: sll.w $a5, $a5, $a0 + ; LA64-NEXT: and $a6, $a3, $a4 +-; LA64-NEXT: or $a6, $a6, $a5 ++; LA64-NEXT: or $a5, $a6, $a5 ++; LA64-NEXT: addi.w $a6, $a3, 0 + ; LA64-NEXT: .LBB1_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB1_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a5, $a2, 0 +-; LA64-NEXT: bne $a5, $a3, .LBB1_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a6, .LBB1_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64-NEXT: move $a7, $a6 ++; LA64-NEXT: move $a7, $a5 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB1_3 + ; LA64-NEXT: b .LBB1_6 +@@ -96,11 +96,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: addi.w $a6, $a3, 0 +-; LA64-NEXT: move $a3, $a5 +-; LA64-NEXT: bne $a5, $a6, .LBB1_1 ++; LA64-NEXT: bne $a3, $a6, .LBB1_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +@@ -109,37 +107,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-LABEL: atomicrmw_uinc_wrap_i32: + ; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a3, $a0, 0 +-; LA64-NEXT: addi.w $a2, $a1, 0 ++; LA64-NEXT: ld.w $a2, $a0, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .p2align 4, , 16 + ; LA64-NEXT: .LBB2_1: # %atomicrmw.start + ; LA64-NEXT: # =>This Loop Header: Depth=1 + ; LA64-NEXT: # Child Loop BB2_3 Depth 2 +-; LA64-NEXT: addi.w $a4, $a3, 0 +-; LA64-NEXT: sltu $a1, $a4, $a2 +-; LA64-NEXT: xori $a1, $a1, 1 +-; LA64-NEXT: addi.d $a5, $a3, 1 +-; LA64-NEXT: masknez $a5, $a5, $a1 ++; LA64-NEXT: addi.w $a3, $a2, 0 ++; LA64-NEXT: sltu $a4, $a3, $a1 ++; LA64-NEXT: xori $a4, $a4, 1 ++; LA64-NEXT: addi.d $a2, $a2, 1 ++; LA64-NEXT: masknez $a4, $a2, $a4 + ; LA64-NEXT: .LBB2_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB2_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a1, $a0, 0 +-; LA64-NEXT: bne $a1, $a3, .LBB2_5 ++; LA64-NEXT: ll.w $a2, $a0, 0 ++; LA64-NEXT: bne $a2, $a3, .LBB2_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64-NEXT: move $a6, $a5 +-; LA64-NEXT: sc.w $a6, $a0, 0 +-; LA64-NEXT: beqz $a6, .LBB2_3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: sc.w $a5, $a0, 0 ++; LA64-NEXT: beqz $a5, .LBB2_3 + ; LA64-NEXT: b .LBB2_6 + ; LA64-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: move $a3, $a1 +-; LA64-NEXT: bne $a1, $a4, .LBB2_1 ++; LA64-NEXT: bne $a2, $a3, .LBB2_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +@@ -212,15 +209,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: andi $a6, $a6, 255 + ; LA64-NEXT: sll.w $a6, $a6, $a0 + ; LA64-NEXT: and $a7, $a3, $a4 +-; LA64-NEXT: or $a7, $a7, $a6 ++; LA64-NEXT: or $a6, $a7, $a6 ++; LA64-NEXT: addi.w $a7, $a3, 0 + ; LA64-NEXT: .LBB4_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB4_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a6, $a2, 0 +-; LA64-NEXT: bne $a6, $a3, .LBB4_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a7, .LBB4_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 +-; LA64-NEXT: move $t0, $a7 ++; LA64-NEXT: move $t0, $a6 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB4_3 + ; LA64-NEXT: b .LBB4_6 +@@ -229,11 +227,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB4_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: addi.w $a7, $a3, 0 +-; LA64-NEXT: move $a3, $a6 +-; LA64-NEXT: bne $a6, $a7, .LBB4_1 ++; LA64-NEXT: bne $a3, $a7, .LBB4_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a6, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +@@ -270,15 +266,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0 + ; LA64-NEXT: sll.w $a6, $a6, $a0 + ; LA64-NEXT: and $a7, $a3, $a4 +-; LA64-NEXT: or $a7, $a7, $a6 ++; LA64-NEXT: or $a6, $a7, $a6 ++; LA64-NEXT: addi.w $a7, $a3, 0 + ; LA64-NEXT: .LBB5_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB5_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a6, $a2, 0 +-; LA64-NEXT: bne $a6, $a3, .LBB5_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a7, .LBB5_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 +-; LA64-NEXT: move $t0, $a7 ++; LA64-NEXT: move $t0, $a6 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB5_3 + ; LA64-NEXT: b .LBB5_6 +@@ -287,11 +284,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB5_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: addi.w $a7, $a3, 0 +-; LA64-NEXT: move $a3, $a6 +-; LA64-NEXT: bne $a6, $a7, .LBB5_1 ++; LA64-NEXT: bne $a3, $a7, .LBB5_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a6, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +@@ -300,22 +295,22 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-LABEL: atomicrmw_udec_wrap_i32: + ; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a4, $a0, 0 ++; LA64-NEXT: ld.w $a2, $a0, 0 + ; LA64-NEXT: addi.w $a3, $a1, 0 + ; LA64-NEXT: .p2align 4, , 16 + ; LA64-NEXT: .LBB6_1: # %atomicrmw.start + ; LA64-NEXT: # =>This Loop Header: Depth=1 + ; LA64-NEXT: # Child Loop BB6_3 Depth 2 +-; LA64-NEXT: addi.w $a5, $a4, 0 +-; LA64-NEXT: sltu $a2, $a3, $a5 +-; LA64-NEXT: addi.d $a6, $a4, -1 +-; LA64-NEXT: masknez $a6, $a6, $a2 +-; LA64-NEXT: maskeqz $a2, $a1, $a2 +-; LA64-NEXT: or $a2, $a2, $a6 +-; LA64-NEXT: sltui $a6, $a5, 1 +-; LA64-NEXT: masknez $a2, $a2, $a6 +-; LA64-NEXT: maskeqz $a6, $a1, $a6 +-; LA64-NEXT: or $a6, $a6, $a2 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: sltu $a5, $a3, $a4 ++; LA64-NEXT: addi.d $a2, $a2, -1 ++; LA64-NEXT: masknez $a2, $a2, $a5 ++; LA64-NEXT: maskeqz $a5, $a1, $a5 ++; LA64-NEXT: or $a2, $a5, $a2 ++; LA64-NEXT: sltui $a5, $a4, 1 ++; LA64-NEXT: masknez $a2, $a2, $a5 ++; LA64-NEXT: maskeqz $a5, $a1, $a5 ++; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: .LBB6_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB6_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +@@ -323,17 +318,16 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: bne $a2, $a4, .LBB6_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 +-; LA64-NEXT: move $a7, $a6 +-; LA64-NEXT: sc.w $a7, $a0, 0 +-; LA64-NEXT: beqz $a7, .LBB6_3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sc.w $a6, $a0, 0 ++; LA64-NEXT: beqz $a6, .LBB6_3 + ; LA64-NEXT: b .LBB6_6 + ; LA64-NEXT: .LBB6_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB6_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: move $a4, $a2 +-; LA64-NEXT: bne $a2, $a5, .LBB6_1 ++; LA64-NEXT: bne $a2, $a4, .LBB6_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end + ; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 1dd3f39852d8..ebb09640e6c9 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -71,6 +71,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind + define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB2_3 +@@ -176,6 +177,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin + define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_monotonic: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB6_3 +@@ -285,9 +287,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $a1, 0 + ; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB10_3 ++; LA64-NEXT: ll.w $a1, $a0, 0 ++; LA64-NEXT: bne $a1, $a3, .LBB10_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +@@ -296,7 +299,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + ; LA64-NEXT: .LBB10_3: + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB10_4: +-; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: move $a0, $a1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + %res = extractvalue { i32, i1 } %tmp, 0 +@@ -404,6 +407,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB14_3 +@@ -415,8 +419,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + ; LA64-NEXT: .LBB14_3: + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB14_4: +-; LA64-NEXT: addi.w $a0, $a1, 0 +-; LA64-NEXT: xor $a0, $a3, $a0 ++; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -516,6 +519,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw + define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB18_3 +@@ -625,9 +629,10 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) + define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $a1, 0 + ; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB22_3 ++; LA64-NEXT: ll.w $a1, $a0, 0 ++; LA64-NEXT: bne $a1, $a3, .LBB22_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +@@ -636,7 +641,7 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) + ; LA64-NEXT: .LBB22_3: + ; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB22_4: +-; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: move $a0, $a1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + %res = extractvalue { i32, i1 } %tmp, 0 +@@ -744,6 +749,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n + define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB26_3 +@@ -755,8 +761,7 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n + ; LA64-NEXT: .LBB26_3: + ; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB26_4: +-; LA64-NEXT: addi.w $a0, $a1, 0 +-; LA64-NEXT: xor $a0, $a3, $a0 ++; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 589360823b14..4d8160d70803 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -16,6 +16,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB0_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -33,8 +34,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB0_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB0_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -51,6 +51,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB0_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -68,8 +69,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB0_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB0_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 +@@ -90,6 +90,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB1_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -107,8 +108,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB1_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB1_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -125,6 +125,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB1_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -142,8 +143,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB1_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB1_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 +@@ -165,6 +165,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB2_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -182,8 +183,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB2_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB2_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -201,6 +201,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB2_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -218,8 +219,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB2_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB2_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 +@@ -241,6 +241,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB3_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -258,8 +259,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB3_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB3_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -277,6 +277,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB3_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -294,8 +295,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB3_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB3_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 +@@ -694,6 +694,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB8_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -711,8 +712,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB8_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB8_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -729,6 +729,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB8_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -746,8 +747,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB8_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB8_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 +@@ -768,6 +768,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB9_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -785,8 +786,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB9_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB9_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -803,6 +803,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB9_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -820,8 +821,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB9_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB9_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 +@@ -843,6 +843,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB10_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -860,8 +861,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB10_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB10_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -879,6 +879,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB10_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -896,8 +897,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB10_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB10_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 +@@ -919,6 +919,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB11_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -936,8 +937,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB11_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB11_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -955,6 +955,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB11_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -972,8 +973,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB11_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB11_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 +@@ -1372,6 +1372,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB16_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1389,8 +1390,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB16_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1407,6 +1407,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB16_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1424,8 +1425,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB16_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 +@@ -1446,6 +1446,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB17_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1463,8 +1464,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB17_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1481,6 +1481,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB17_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1498,8 +1499,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB17_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 +@@ -1521,6 +1521,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB18_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1538,8 +1539,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB18_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1557,6 +1557,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB18_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1574,8 +1575,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB18_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 +@@ -1597,6 +1597,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB19_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1614,8 +1615,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB19_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1633,6 +1633,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB19_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1650,8 +1651,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB19_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 +@@ -2074,6 +2074,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB24_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2091,8 +2092,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB24_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2109,6 +2109,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB24_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2126,8 +2127,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB24_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 +@@ -2148,6 +2148,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB25_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2165,8 +2166,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB25_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2183,6 +2183,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB25_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2200,8 +2201,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB25_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 +@@ -2223,6 +2223,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB26_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2240,8 +2241,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB26_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2259,6 +2259,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB26_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2276,8 +2277,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB26_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 +@@ -2299,6 +2299,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB27_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2316,8 +2317,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB27_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2335,6 +2335,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB27_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2352,8 +2353,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB27_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 +@@ -2752,6 +2752,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB32_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2769,8 +2770,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB32_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB32_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2787,6 +2787,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB32_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2804,8 +2805,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB32_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB32_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 +@@ -2826,6 +2826,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB33_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2843,8 +2844,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB33_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB33_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2861,6 +2861,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB33_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2878,8 +2879,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB33_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB33_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 +@@ -2901,6 +2901,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB34_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2918,8 +2919,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB34_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB34_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2937,6 +2937,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB34_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2954,8 +2955,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB34_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB34_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 +@@ -2977,6 +2977,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB35_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2994,8 +2995,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB35_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB35_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -3013,6 +3013,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB35_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -3030,8 +3031,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB35_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB35_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 +-- +2.20.1 + diff --git a/0005-Backport-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_RISCV_-SET-SUB-_ULEB128-for-uleb128-directives.patch b/0006-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch similarity index 99% rename from 0005-Backport-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_RISCV_-SET-SUB-_ULEB128-for-uleb128-directives.patch rename to 0006-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch index c1c4f9f..786178a 100644 --- a/0005-Backport-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_RISCV_-SET-SUB-_ULEB128-for-uleb128-directives.patch +++ b/0006-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch @@ -13,7 +13,6 @@ function. And at the same time, we ensure that this patch can't introduce new de (such as the generation for uleb128 reloc) on RISCV in this version. 3, Fix invalid-sleb.s in original commit d7398a35. -Change-Id: Ie687b7d8483c76cf647141162641db1a9d819a04 --- .../llvm/BinaryFormat/ELFRelocs/RISCV.def | 2 + llvm/include/llvm/MC/MCAsmBackend.h | 8 +++ diff --git a/0006-tsan-Add-support-for-linux-loongarch64-in-lib-tsan-g.patch b/0006-tsan-Add-support-for-linux-loongarch64-in-lib-tsan-g.patch new file mode 100644 index 0000000..1c8d479 --- /dev/null +++ b/0006-tsan-Add-support-for-linux-loongarch64-in-lib-tsan-g.patch @@ -0,0 +1,106 @@ +From 4021d30c75ac6af5e57bf0b77ef1d8e8dea00160 Mon Sep 17 00:00:00 2001 +From: abner chenc +Date: Thu, 28 Dec 2023 18:54:35 +0800 +Subject: [PATCH 06/27] [tsan] Add support for linux/loongarch64 in + lib/tsan/go/buildgo.sh (#72819) + +Co-authored-by: Xiaolin Zhao +(cherry picked from commit 9d3fbf97bef3f19da4e0a047f017b8142f59b3fd) +--- + compiler-rt/lib/tsan/go/buildgo.sh | 2 ++ + compiler-rt/lib/tsan/rtl/tsan_platform.h | 32 +++++++++++++++++++ + .../lib/tsan/rtl/tsan_platform_linux.cpp | 8 ++++- + 3 files changed, 41 insertions(+), 1 deletion(-) + +diff --git a/compiler-rt/lib/tsan/go/buildgo.sh b/compiler-rt/lib/tsan/go/buildgo.sh +index 0bd59368cc46..78ba41a0bdc6 100755 +--- a/compiler-rt/lib/tsan/go/buildgo.sh ++++ b/compiler-rt/lib/tsan/go/buildgo.sh +@@ -10,6 +10,8 @@ if [ "`uname -a | grep Linux`" != "" ]; then + HOST_GOARCH="amd64" + elif [ "`uname -a | grep aarch64`" != "" ]; then + HOST_GOARCH="arm64" ++ elif [ "`uname -a | grep loongarch64`" != "" ]; then ++ HOST_GOARCH="loong64" + elif [ "`uname -a | grep -i mips64`" != "" ]; then + if [ "`lscpu | grep -i Little`" != "" ]; then + HOST_GOARCH="mips64le" +diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h +index f0cdaf48eaa3..48dd56d15751 100644 +--- a/compiler-rt/lib/tsan/rtl/tsan_platform.h ++++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h +@@ -558,6 +558,35 @@ struct MappingGoAarch64 { + static const uptr kShadowAdd = 0x200000000000ull; + }; + ++/* Go on linux/loongarch64 (47-bit VMA) ++0000 0000 1000 - 0000 1000 0000: executable ++0000 1000 0000 - 00c0 0000 0000: - ++00c0 0000 0000 - 00e0 0000 0000: heap ++00e0 0000 0000 - 2000 0000 0000: - ++2000 0000 0000 - 2800 0000 0000: shadow ++2800 0000 0000 - 3000 0000 0000: - ++3000 0000 0000 - 3200 0000 0000: metainfo (memory blocks and sync objects) ++3200 0000 0000 - 8000 0000 0000: - ++*/ ++struct MappingGoLoongArch64_47 { ++ static const uptr kMetaShadowBeg = 0x300000000000ull; ++ static const uptr kMetaShadowEnd = 0x320000000000ull; ++ static const uptr kShadowBeg = 0x200000000000ull; ++ static const uptr kShadowEnd = 0x280000000000ull; ++ static const uptr kLoAppMemBeg = 0x000000001000ull; ++ static const uptr kLoAppMemEnd = 0x00e000000000ull; ++ static const uptr kMidAppMemBeg = 0; ++ static const uptr kMidAppMemEnd = 0; ++ static const uptr kHiAppMemBeg = 0; ++ static const uptr kHiAppMemEnd = 0; ++ static const uptr kHeapMemBeg = 0; ++ static const uptr kHeapMemEnd = 0; ++ static const uptr kVdsoBeg = 0; ++ static const uptr kShadowMsk = 0; ++ static const uptr kShadowXor = 0; ++ static const uptr kShadowAdd = 0x200000000000ull; ++}; ++ + /* + Go on linux/mips64 (47-bit VMA) + 0000 0000 1000 - 0000 1000 0000: executable +@@ -633,6 +662,8 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { + return Func::template Apply(arg); + # elif defined(__aarch64__) + return Func::template Apply(arg); ++# elif defined(__loongarch_lp64) ++ return Func::template Apply(arg); + # elif SANITIZER_WINDOWS + return Func::template Apply(arg); + # else +@@ -692,6 +723,7 @@ void ForEachMapping() { + Func::template Apply(); + Func::template Apply(); + Func::template Apply(); ++ Func::template Apply(); + Func::template Apply(); + Func::template Apply(); + } +diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +index 384a443c16b0..3f4a3760794f 100644 +--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +@@ -238,7 +238,13 @@ void InitializePlatformEarly() { + Printf("FATAL: Found %zd - Supported 47\n", vmaSize); + Die(); + } +-# endif ++# else ++ if (vmaSize != 47) { ++ Printf("FATAL: ThreadSanitizer: unsupported VMA range\n"); ++ Printf("FATAL: Found %zd - Supported 47\n", vmaSize); ++ Die(); ++ } ++# endif + #elif defined(__powerpc64__) + # if !SANITIZER_GO + if (vmaSize != 44 && vmaSize != 46 && vmaSize != 47) { +-- +2.20.1 + diff --git a/0006-Backport-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-to-handle-the-mutable-label-diff-in-dwarfinfo.patch b/0007-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-t.patch similarity index 99% rename from 0006-Backport-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-to-handle-the-mutable-label-diff-in-dwarfinfo.patch rename to 0007-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-t.patch index 4d19f8c..37e9c85 100644 --- a/0006-Backport-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-to-handle-the-mutable-label-diff-in-dwarfinfo.patch +++ b/0007-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-t.patch @@ -11,7 +11,6 @@ diffs. Calculate whether the label diff is mutable. For immutable label diff, return false and do the other works by its parent function. (cherry picked from commit ed7f4edc19ada006789318a0929b57d1b5a761bd) -Change-Id: Iae5bad958c6d1a71dac1672f5f03991eaeea6d22 --- llvm/lib/Object/RelocationResolver.cpp | 12 +- .../MCTargetDesc/LoongArchAsmBackend.cpp | 129 ++++++++++++++++++ diff --git a/0007-LoongArch-MC-Add-invalid-immediate-testcases-for-LSX.patch b/0007-LoongArch-MC-Add-invalid-immediate-testcases-for-LSX.patch new file mode 100644 index 0000000..d014321 --- /dev/null +++ b/0007-LoongArch-MC-Add-invalid-immediate-testcases-for-LSX.patch @@ -0,0 +1,1220 @@ +From 91c9df5a4deae4ab63953674880493b9764989ad Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:15:19 +0800 +Subject: [PATCH 07/42] [LoongArch][MC] Add invalid immediate testcases for LSX + instructions + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D157573 + +(cherry picked from commit 2f4b6695836e16ec075061cd2508444bd403ad7d) + +--- + llvm/test/MC/LoongArch/lsx/invalid-imm.s | 1149 +++++++++++++++++++++- + 1 file changed, 1143 insertions(+), 6 deletions(-) + +diff --git a/llvm/test/MC/LoongArch/lsx/invalid-imm.s b/llvm/test/MC/LoongArch/lsx/invalid-imm.s +index fb7e24c83488..c3f9aaa08281 100644 +--- a/llvm/test/MC/LoongArch/lsx/invalid-imm.s ++++ b/llvm/test/MC/LoongArch/lsx/invalid-imm.s +@@ -3,53 +3,1190 @@ + # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + + ## uimm1 ++vstelm.d $vr0, $a0, 8, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ ++vstelm.d $vr0, $a0, 8, 2 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ ++vreplvei.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ + vreplvei.d $vr0, $vr1, 2 + # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + ++vpickve2gr.du $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] ++ ++vpickve2gr.du $a0, $vr1, 2 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] ++ ++vpickve2gr.d $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] ++ ++vpickve2gr.d $a0, $vr1, 2 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] ++ ++vinsgr2vr.d $vr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ ++vinsgr2vr.d $vr0, $a0, 2 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ ++## uimm2 ++vstelm.w $vr0, $a0, 4, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vstelm.w $vr0, $a0, 4, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vreplvei.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vreplvei.w $vr0, $vr1, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vpickve2gr.wu $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] ++ ++vpickve2gr.wu $a0, $vr1, 4 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] ++ ++vpickve2gr.w $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++vpickve2gr.w $a0, $vr1, 4 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++vinsgr2vr.w $vr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vinsgr2vr.w $vr0, $a0, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++## uimm3 ++vstelm.h $vr0, $a0, 2, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vstelm.h $vr0, $a0, 2, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vreplvei.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vreplvei.h $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vpickve2gr.hu $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++vpickve2gr.hu $a0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++vpickve2gr.h $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++vpickve2gr.h $a0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++vinsgr2vr.h $vr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vinsgr2vr.h $vr0, $a0, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitrevi.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitrevi.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitseti.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitseti.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitclri.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitclri.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vsrari.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsrari.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsrlri.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsrlri.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsllwil.hu.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] ++ ++vsllwil.hu.bu $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] ++ ++vsllwil.h.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++vsllwil.h.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++vrotri.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vrotri.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsrai.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsrai.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsrli.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsrli.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vslli.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vslli.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsat.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] ++ ++vsat.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] ++ ++vsat.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsat.bu $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ + ## uimm4 ++vstelm.b $vr0, $a0, 1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vstelm.b $vr0, $a0, 1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vreplvei.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vreplvei.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vpickve2gr.bu $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vpickve2gr.bu $a0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vpickve2gr.b $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vpickve2gr.b $a0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vinsgr2vr.b $vr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vinsgr2vr.b $vr0, $a0, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitrevi.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitrevi.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitseti.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitseti.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitclri.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitclri.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vssrarni.bu.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vssrarni.bu.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vssrlrni.bu.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vssrlrni.bu.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vssrarni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrarni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrlrni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrlrni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrani.bu.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrani.bu.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrlni.bu.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrlni.bu.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrani.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vssrani.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vssrlni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vssrlni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrarni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrarni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrlrni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrlrni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrani.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vsrani.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vsrlni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vsrlni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vsrari.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsrari.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsrlri.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsrlri.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsllwil.wu.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vsllwil.wu.hu $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vsllwil.w.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsllwil.w.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vrotri.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vrotri.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsrai.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsrai.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsrli.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsrli.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vslli.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vslli.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsat.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] ++ + vsat.h $vr0, $vr1, 16 + # CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] + ++vsat.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsat.hu $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++## uimm5 ++vbsrl.v $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vbsrl.v $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vbsll.v $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vbsll.v $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vslti.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vfrstpi.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++vfrstpi.h $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++vfrstpi.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++vfrstpi.b $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++vbitrevi.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitrevi.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitseti.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitseti.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitclri.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitclri.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vssrarni.hu.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vssrarni.hu.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vssrlrni.hu.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vssrlrni.hu.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vssrarni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrarni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrlrni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrlrni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrani.hu.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrani.hu.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrlni.hu.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrlni.hu.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrani.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vssrani.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vssrlni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vssrlni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrarni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrarni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrlrni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrlrni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrani.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vsrani.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vsrlni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vsrlni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vsrari.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsrari.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsrlri.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsrlri.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsllwil.du.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vsllwil.du.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vsllwil.d.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsllwil.d.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vrotri.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vrotri.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsrai.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vsrai.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vsrli.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vsrli.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vslli.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vslli.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsat.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] ++ ++vsat.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] ++ ++vsat.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vsat.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ + ## simm5 ++vslti.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ + vseqi.b $vr0, $vr1, 16 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + ++vmaxi.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++## uimm6 ++vbitrevi.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitrevi.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitseti.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitseti.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitclri.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitclri.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vssrarni.wu.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++vssrarni.wu.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++vssrlrni.wu.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++vssrlrni.wu.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++vssrarni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrarni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrlrni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrlrni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrani.wu.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrani.wu.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrlni.wu.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrlni.wu.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrani.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vssrani.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vssrlni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vssrlni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrarni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrarni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrlrni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrlrni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrani.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vsrani.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vsrlni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vsrlni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vsrari.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vsrari.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vsrlri.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vsrlri.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vrotri.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vrotri.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vsrai.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsrai.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsrli.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsrli.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vslli.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vslli.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsat.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] ++ ++vsat.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] ++ ++vsat.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsat.du $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ + ## uimm7 ++vssrarni.du.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++vssrarni.du.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++vssrlrni.du.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++vssrlrni.du.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++vssrarni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrarni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrlrni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrlrni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrani.du.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrani.du.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrlni.du.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrlni.du.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrani.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vssrani.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vssrlni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vssrlni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrarni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrarni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrlrni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrlrni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrani.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] ++ ++vsrani.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] ++ ++vsrlni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] ++ + vsrlni.d.q $vr0, $vr1, 128 + # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] + +-## simm8 ++## uimm8 ++vextrins.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.d $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.w $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.h $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vpermi.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ + vpermi.w $vr0, $vr1, 256 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + ++vshuf4i.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.d $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.w $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.h $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vbitseli.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vbitseli.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vandi.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vandi.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vori.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] ++ ++vori.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] ++ ++vxori.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vxori.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vnori.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vnori.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++## simm8 ++vstelm.b $vr0, $a0, -129, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] ++ ++vstelm.b $vr0, $a0, 128, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] ++ + ## simm8_lsl1 +-vstelm.h $vr0, $a0, 255, 1 ++vstelm.h $vr0, $a0, -258, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] ++ ++vstelm.h $vr0, $a0, 256, 1 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] + + ## simm8_lsl2 +-vstelm.w $vr0, $a0, 512, 1 ++vstelm.w $vr0, $a0, -516, 1 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] + +-## simm10 +-vrepli.b $vr0, 512 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++vstelm.w $vr0, $a0, 512, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] + + ## simm8_lsl3 ++vstelm.d $vr0, $a0, -1032, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] ++ + vstelm.d $vr0, $a0, 1024, 1 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] + + ## simm9_lsl3 ++vldrepl.d $vr0, $a0, -2056 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] ++ + vldrepl.d $vr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] + + ## simm10_lsl2 ++vldrepl.w $vr0, $a0, -2052 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] ++ + vldrepl.w $vr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] + ++## simm10 ++vrepli.b $vr0, -513 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.b $vr0, 512 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.h $vr0, -513 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.h $vr0, 512 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.w $vr0, -513 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.w $vr0, 512 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.d $vr0, -513 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.d $vr0, 512 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ + ## simm11_lsl1 ++vldrepl.h $vr0, $a0, -2050 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] ++ + vldrepl.h $vr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] + ++## simm12 ++vldrepl.b $vr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] ++ ++vldrepl.b $vr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] ++ ++vst $vr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] ++ ++vst $vr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] ++ ++vld $vr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] ++ ++vld $vr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] ++ + ## simm13 ++vldi $vr0, -4097 ++# CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] ++ + vldi $vr0, 4096 + # CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] +-- +2.20.1 + diff --git a/0007-LoongArch-Pre-commit-test-for-76555.-NFC.patch b/0007-LoongArch-Pre-commit-test-for-76555.-NFC.patch new file mode 100644 index 0000000..9cfdc83 --- /dev/null +++ b/0007-LoongArch-Pre-commit-test-for-76555.-NFC.patch @@ -0,0 +1,192 @@ +From 1c81122d5b5a75e9f3d6650bff8e45432da87070 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 2 Jan 2024 10:57:40 +0800 +Subject: [PATCH 07/23] [LoongArch] Pre-commit test for #76555. NFC + +(cherry picked from commit 3d6fc35b9071009c5ef37f879a12982c6a54db60) +--- + .../LoongArch/psabi-restricted-scheduling.ll | 172 ++++++++++++++++++ + 1 file changed, 172 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll + +diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll +new file mode 100644 +index 000000000000..150a935d7bf8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll +@@ -0,0 +1,172 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --code-model=medium --post-RA-scheduler=0 < %s \ ++; RUN: | FileCheck %s --check-prefix=MEDIUM_NO_SCH ++; RUN: llc --mtriple=loongarch64 --code-model=medium --post-RA-scheduler=1 < %s \ ++; RUN: | FileCheck %s --check-prefix=MEDIUM_SCH ++; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=0 < %s \ ++; RUN: | FileCheck %s --check-prefix=LARGE_NO_SCH ++; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=1 < %s \ ++; RUN: | FileCheck %s --check-prefix=LARGE_SCH ++ ++;; FIXME: According to the description of the psABI v2.30, the code sequences ++;; of `PseudoLA*_LARGE` instruction and Medium code model's function call must ++;; be adjacent. ++ ++@g = dso_local global i64 zeroinitializer, align 4 ++@G = global i64 zeroinitializer, align 4 ++@gd = external thread_local global i64 ++@ld = external thread_local(localdynamic) global i64 ++@ie = external thread_local(initialexec) global i64 ++ ++declare ptr @bar(i64) ++ ++define void @foo() nounwind { ++; MEDIUM_NO_SCH-LABEL: foo: ++; MEDIUM_NO_SCH: # %bb.0: ++; MEDIUM_NO_SCH-NEXT: addi.d $sp, $sp, -16 ++; MEDIUM_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) ++; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) ++; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0 ++; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) ++; MEDIUM_NO_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) ++; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0 ++; MEDIUM_NO_SCH-NEXT: ori $a0, $zero, 1 ++; MEDIUM_NO_SCH-NEXT: pcaddu18i $ra, %call36(bar) ++; MEDIUM_NO_SCH-NEXT: jirl $ra, $ra, 0 ++; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) ++; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) ++; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp ++; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) ++; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) ++; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp ++; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) ++; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) ++; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp ++; MEDIUM_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; MEDIUM_NO_SCH-NEXT: addi.d $sp, $sp, 16 ++; MEDIUM_NO_SCH-NEXT: ret ++; ++; MEDIUM_SCH-LABEL: foo: ++; MEDIUM_SCH: # %bb.0: ++; MEDIUM_SCH-NEXT: addi.d $sp, $sp, -16 ++; MEDIUM_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; MEDIUM_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) ++; MEDIUM_SCH-NEXT: pcaddu18i $ra, %call36(bar) ++; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) ++; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 ++; MEDIUM_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) ++; MEDIUM_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) ++; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 ++; MEDIUM_SCH-NEXT: ori $a0, $zero, 1 ++; MEDIUM_SCH-NEXT: jirl $ra, $ra, 0 ++; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) ++; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) ++; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp ++; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) ++; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) ++; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp ++; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) ++; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) ++; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp ++; MEDIUM_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; MEDIUM_SCH-NEXT: addi.d $sp, $sp, 16 ++; MEDIUM_SCH-NEXT: ret ++; ++; LARGE_NO_SCH-LABEL: foo: ++; LARGE_NO_SCH: # %bb.0: ++; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, -16 ++; LARGE_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; LARGE_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) ++; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) ++; LARGE_NO_SCH-NEXT: lu32i.d $a1, %got64_pc_lo20(G) ++; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 ++; LARGE_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) ++; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %pc_lo12(g) ++; LARGE_NO_SCH-NEXT: lu32i.d $a1, %pc64_lo20(g) ++; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) ++; LARGE_NO_SCH-NEXT: add.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 ++; LARGE_NO_SCH-NEXT: ori $a0, $zero, 1 ++; LARGE_NO_SCH-NEXT: pcalau12i $a1, %got_pc_hi20(bar) ++; LARGE_NO_SCH-NEXT: addi.d $ra, $zero, %got_pc_lo12(bar) ++; LARGE_NO_SCH-NEXT: lu32i.d $ra, %got64_pc_lo20(bar) ++; LARGE_NO_SCH-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(bar) ++; LARGE_NO_SCH-NEXT: ldx.d $ra, $ra, $a1 ++; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 ++; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) ++; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(gd) ++; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(gd) ++; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(gd) ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp ++; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) ++; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) ++; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) ++; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp ++; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) ++; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) ++; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) ++; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp ++; LARGE_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, 16 ++; LARGE_NO_SCH-NEXT: ret ++; ++; LARGE_SCH-LABEL: foo: ++; LARGE_SCH: # %bb.0: ++; LARGE_SCH-NEXT: addi.d $sp, $sp, -16 ++; LARGE_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; LARGE_SCH-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) ++; LARGE_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) ++; LARGE_SCH-NEXT: addi.d $ra, $zero, %got_pc_lo12(bar) ++; LARGE_SCH-NEXT: lu32i.d $a1, %got64_pc_lo20(G) ++; LARGE_SCH-NEXT: lu32i.d $ra, %got64_pc_lo20(bar) ++; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) ++; LARGE_SCH-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(bar) ++; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_SCH-NEXT: addi.d $a1, $zero, %pc_lo12(g) ++; LARGE_SCH-NEXT: lu32i.d $a1, %pc64_lo20(g) ++; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) ++; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 ++; LARGE_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) ++; LARGE_SCH-NEXT: add.d $a0, $a1, $a0 ++; LARGE_SCH-NEXT: pcalau12i $a1, %got_pc_hi20(bar) ++; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 ++; LARGE_SCH-NEXT: ldx.d $ra, $ra, $a1 ++; LARGE_SCH-NEXT: ori $a0, $zero, 1 ++; LARGE_SCH-NEXT: jirl $ra, $ra, 0 ++; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(gd) ++; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) ++; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(gd) ++; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(gd) ++; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) ++; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) ++; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) ++; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp ++; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) ++; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) ++; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) ++; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) ++; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp ++; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) ++; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp ++; LARGE_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; LARGE_SCH-NEXT: addi.d $sp, $sp, 16 ++; LARGE_SCH-NEXT: ret ++ %V = load volatile i64, ptr @G ++ %v = load volatile i64, ptr @g ++ call void @bar(i64 1) ++ %v_gd = load volatile i64, ptr @gd ++ %v_ld = load volatile i64, ptr @ld ++ %v_ie = load volatile i64, ptr @ie ++ ret void ++} +-- +2.20.1 + diff --git a/0007-Memory-Call-__clear_cache-in-InvalidateInstructionCa.patch b/0007-Memory-Call-__clear_cache-in-InvalidateInstructionCa.patch new file mode 100644 index 0000000..c954b82 --- /dev/null +++ b/0007-Memory-Call-__clear_cache-in-InvalidateInstructionCa.patch @@ -0,0 +1,35 @@ +From 9ad8624e947a692ec9409fadebd946aaba5dc8f7 Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Thu, 19 Oct 2023 10:16:13 +0800 +Subject: [PATCH 7/7] [Memory] Call __clear_cache in InvalidateInstructionCache + on LoongArch (#67285) + +As the comments of `InvalidateInstructionCache`: Before the JIT can run +a block of code that has been emitted it must invalidate the instruction +cache on some platforms. I think it applies to LoongArch as LoongArch +has a weak memory-model. But I'm not able to write a test to demonstrate +this issue. Perhaps self-modifing code should be wrote? + +(cherry picked from commit fb366581e7d67df7d9a98605fd65a7e7908451e7) + +--- + llvm/lib/Support/Unix/Memory.inc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc +index 4c8f6b2ea7d3..69bd1164343d 100644 +--- a/llvm/lib/Support/Unix/Memory.inc ++++ b/llvm/lib/Support/Unix/Memory.inc +@@ -237,7 +237,8 @@ void Memory::InvalidateInstructionCache(const void *Addr, size_t Len) { + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +-#elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ ++#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ ++ defined(__mips__)) && \ + defined(__GNUC__) + // FIXME: Can we safely always call this for __GNUC__ everywhere? + const char *Start = static_cast(Addr); +-- +2.20.1 + diff --git a/0007-tsan-Refine-fstat-64-interceptors-86625.patch b/0007-tsan-Refine-fstat-64-interceptors-86625.patch new file mode 100644 index 0000000..3a64dbc --- /dev/null +++ b/0007-tsan-Refine-fstat-64-interceptors-86625.patch @@ -0,0 +1,112 @@ +From 1313f6d8bf7ba44ce712e638f98f72c7dbb4e457 Mon Sep 17 00:00:00 2001 +From: Fangrui Song +Date: Tue, 26 Mar 2024 14:09:39 -0700 +Subject: [PATCH 07/27] [tsan] Refine fstat{,64} interceptors (#86625) + +In glibc versions before 2.33. `libc_nonshared.a` defines +`__fxstat/__fxstat64` but there is no `fstat/fstat64`. glibc 2.33 added +`fstat/fstat64` and obsoleted `__fxstat/__fxstat64`. Ports added after +2.33 do not provide `__fxstat/__fxstat64`, so our `fstat/fstat64` +interceptors using `__fxstat/__fxstat64` interceptors would lead to +runtime failures on such ports (LoongArch and certain RISC-V ports). + +Similar to https://reviews.llvm.org/D118423, refine the conditions that +we define fstat{,64} interceptors. `fstat` is supported by musl/*BSD +while `fstat64` is glibc only. + +(cherry picked from commit d5224b73ccd09a6759759791f58426b6acd4a2e2) +--- + .../lib/tsan/rtl/tsan_interceptors_posix.cpp | 43 ++++++++----------- + 1 file changed, 18 insertions(+), 25 deletions(-) + +diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +index 177e338bf282..622afc90a577 100644 +--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +@@ -14,6 +14,7 @@ + + #include "sanitizer_common/sanitizer_atomic.h" + #include "sanitizer_common/sanitizer_errno.h" ++#include "sanitizer_common/sanitizer_glibc_version.h" + #include "sanitizer_common/sanitizer_libc.h" + #include "sanitizer_common/sanitizer_linux.h" + #include "sanitizer_common/sanitizer_platform_limits_netbsd.h" +@@ -1595,47 +1596,40 @@ TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) { + FdAccess(thr, pc, fd); + return REAL(__fxstat)(version, fd, buf); + } +-#define TSAN_MAYBE_INTERCEPT___FXSTAT TSAN_INTERCEPT(__fxstat) ++ ++TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) { ++ SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf); ++ if (fd > 0) ++ FdAccess(thr, pc, fd); ++ return REAL(__fxstat64)(version, fd, buf); ++} ++#define TSAN_MAYBE_INTERCEPT___FXSTAT TSAN_INTERCEPT(__fxstat); TSAN_INTERCEPT(__fxstat64) + #else + #define TSAN_MAYBE_INTERCEPT___FXSTAT + #endif + ++#if !SANITIZER_GLIBC || __GLIBC_PREREQ(2, 33) + TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) { +-#if SANITIZER_GLIBC +- SCOPED_TSAN_INTERCEPTOR(__fxstat, 0, fd, buf); +- if (fd > 0) +- FdAccess(thr, pc, fd); +- return REAL(__fxstat)(0, fd, buf); +-#else + SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf); + if (fd > 0) + FdAccess(thr, pc, fd); + return REAL(fstat)(fd, buf); +-#endif +-} +- +-#if SANITIZER_GLIBC +-TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) { +- SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf); +- if (fd > 0) +- FdAccess(thr, pc, fd); +- return REAL(__fxstat64)(version, fd, buf); + } +-#define TSAN_MAYBE_INTERCEPT___FXSTAT64 TSAN_INTERCEPT(__fxstat64) ++# define TSAN_MAYBE_INTERCEPT_FSTAT TSAN_INTERCEPT(fstat) + #else +-#define TSAN_MAYBE_INTERCEPT___FXSTAT64 ++# define TSAN_MAYBE_INTERCEPT_FSTAT + #endif + +-#if SANITIZER_GLIBC ++#if __GLIBC_PREREQ(2, 33) + TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) { +- SCOPED_TSAN_INTERCEPTOR(__fxstat64, 0, fd, buf); ++ SCOPED_TSAN_INTERCEPTOR(fstat64, fd, buf); + if (fd > 0) + FdAccess(thr, pc, fd); +- return REAL(__fxstat64)(0, fd, buf); ++ return REAL(fstat64)(fd, buf); + } +-#define TSAN_MAYBE_INTERCEPT_FSTAT64 TSAN_INTERCEPT(fstat64) ++# define TSAN_MAYBE_INTERCEPT_FSTAT64 TSAN_INTERCEPT(fstat64) + #else +-#define TSAN_MAYBE_INTERCEPT_FSTAT64 ++# define TSAN_MAYBE_INTERCEPT_FSTAT64 + #endif + + TSAN_INTERCEPTOR(int, open, const char *name, int oflag, ...) { +@@ -2929,10 +2923,9 @@ void InitializeInterceptors() { + + TSAN_INTERCEPT(pthread_once); + +- TSAN_INTERCEPT(fstat); + TSAN_MAYBE_INTERCEPT___FXSTAT; ++ TSAN_MAYBE_INTERCEPT_FSTAT; + TSAN_MAYBE_INTERCEPT_FSTAT64; +- TSAN_MAYBE_INTERCEPT___FXSTAT64; + TSAN_INTERCEPT(open); + TSAN_MAYBE_INTERCEPT_OPEN64; + TSAN_INTERCEPT(creat); +-- +2.20.1 + diff --git a/0008-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch b/0008-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch new file mode 100644 index 0000000..821beb3 --- /dev/null +++ b/0008-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch @@ -0,0 +1,61 @@ +From 2f22695f7be1f9c5b2a09901efd5a2268ef22c87 Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Fri, 8 Sep 2023 10:54:35 -0700 +Subject: [PATCH 08/27] [Clang][LoongArch] Generate _mcount instead of mcount + (#65657) + +When building the LoongArch Linux kernel without +`CONFIG_DYNAMIC_FTRACE`, the build fails to link because the mcount +symbol is `mcount`, not `_mcount` like GCC generates and the kernel +expects: + +``` +ld.lld: error: undefined symbol: mcount +>>> referenced by version.c +>>> init/version.o:(early_hostname) in archive vmlinux.a +>>> referenced by do_mounts.c +>>> init/do_mounts.o:(rootfs_init_fs_context) in archive vmlinux.a +>>> referenced by main.c +>>> init/main.o:(__traceiter_initcall_level) in archive vmlinux.a +>>> referenced 97011 more times +>>> did you mean: _mcount +>>> defined in: vmlinux.a(arch/loongarch/kernel/mcount.o) +``` + +Set `MCountName` in `LoongArchTargetInfo` to `_mcount`, which resolves +the build failure. + +(cherry picked from commit cc2b09bee017147527e7bd1eb5272f4f70a7b900) +--- + clang/lib/Basic/Targets/LoongArch.h | 1 + + clang/test/CodeGen/mcount.c | 2 ++ + 2 files changed, 3 insertions(+) + +diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h +index 8f4150b2539d..3313102492cb 100644 +--- a/clang/lib/Basic/Targets/LoongArch.h ++++ b/clang/lib/Basic/Targets/LoongArch.h +@@ -40,6 +40,7 @@ public: + LongDoubleWidth = 128; + LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ MCountName = "_mcount"; + SuitableAlign = 128; + WCharType = SignedInt; + WIntType = UnsignedInt; +diff --git a/clang/test/CodeGen/mcount.c b/clang/test/CodeGen/mcount.c +index 8f994ab4e754..bdd609c1dfc5 100644 +--- a/clang/test/CodeGen/mcount.c ++++ b/clang/test/CodeGen/mcount.c +@@ -7,6 +7,8 @@ + // RUN: %clang_cc1 -pg -triple x86_64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple arm-netbsd-eabi -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple aarch64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s ++// RUN: %clang_cc1 -pg -triple loongarch32 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s ++// RUN: %clang_cc1 -pg -triple loongarch64 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mips-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mips-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mipsel-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s +-- +2.20.1 + diff --git a/0007-Backport-LoongArch-Insert-nops-and-emit-align-reloc-when-handle-alignment-directive.patch b/0008-LoongArch-Insert-nops-and-emit-align-reloc-when-hand.patch similarity index 99% rename from 0007-Backport-LoongArch-Insert-nops-and-emit-align-reloc-when-handle-alignment-directive.patch rename to 0008-LoongArch-Insert-nops-and-emit-align-reloc-when-hand.patch index 9d027af..ce92071 100644 --- a/0007-Backport-LoongArch-Insert-nops-and-emit-align-reloc-when-handle-alignment-directive.patch +++ b/0008-LoongArch-Insert-nops-and-emit-align-reloc-when-hand.patch @@ -15,7 +15,6 @@ lowest 8 bits of addend represent alignment and the other bits of addend represent the maximum number of bytes to emit. (cherry picked from commit c51ab483e6c2d991a01179584705b83fbea1940d) -Change-Id: Iba30702c9dda378acfae0b1f1134926fa838a368 --- llvm/lib/MC/MCExpr.cpp | 2 +- .../MCTargetDesc/LoongArchAsmBackend.cpp | 67 ++++++++++++++++ diff --git a/0008-LoongArch-MC-Add-invalid-immediate-testcases-for-LAS.patch b/0008-LoongArch-MC-Add-invalid-immediate-testcases-for-LAS.patch new file mode 100644 index 0000000..061bc61 --- /dev/null +++ b/0008-LoongArch-MC-Add-invalid-immediate-testcases-for-LAS.patch @@ -0,0 +1,1220 @@ +From fee5433c9cfe7a63735b26f7e0ef2930a78930f1 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:16:09 +0800 +Subject: [PATCH 08/42] [LoongArch][MC] Add invalid immediate testcases for + LASX instructions + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D157574 + +(cherry picked from commit d163ae8c255f663707d4b0d5de03fcb18274b3eb) + +--- + llvm/test/MC/LoongArch/lasx/invalid-imm.s | 1149 ++++++++++++++++++++- + 1 file changed, 1143 insertions(+), 6 deletions(-) + +diff --git a/llvm/test/MC/LoongArch/lasx/invalid-imm.s b/llvm/test/MC/LoongArch/lasx/invalid-imm.s +index 5c61a7a42009..6f64a6f87802 100644 +--- a/llvm/test/MC/LoongArch/lasx/invalid-imm.s ++++ b/llvm/test/MC/LoongArch/lasx/invalid-imm.s +@@ -3,53 +3,1190 @@ + # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + + ## uimm1 ++xvrepl128vei.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] ++ + xvrepl128vei.d $xr0, $xr1, 2 + # CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] + ++## uimm2 ++xvpickve.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve.d $xr0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++xvinsve0.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++xvinsve0.d $xr0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++xvinsgr2vr.d $xr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++xvinsgr2vr.d $xr0, $a0, 4 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve2gr.d $a0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve2gr.d $a0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve2gr.du $a0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve2gr.du $a0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] ++ ++xvstelm.d $xr0, $a0, 8, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++xvstelm.d $xr0, $a0, 8, 4 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++xvrepl128vei.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] ++ ++xvrepl128vei.w $xr0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] ++ ++## uimm3 ++xvpickve.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve.w $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++xvinsve0.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++xvinsve0.w $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++xvinsgr2vr.w $xr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvinsgr2vr.w $xr0, $a0, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve2gr.wu $a0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve2gr.wu $a0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve2gr.w $a0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve2gr.w $a0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++xvstelm.w $xr0, $a0, 4, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvstelm.w $xr0, $a0, 4, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvrepl128vei.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] ++ ++xvrepl128vei.h $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] ++ ++xvbitrevi.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitrevi.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitseti.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitseti.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitclri.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitclri.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvsrari.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsrari.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsrlri.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsrlri.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsllwil.hu.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] ++ ++xvsllwil.hu.bu $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] ++ ++xvsllwil.h.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++xvsllwil.h.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++xvrotri.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvrotri.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsrai.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsrai.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsrli.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsrli.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvslli.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvslli.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsat.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++xvsat.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++xvsat.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsat.bu $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ + ## uimm4 ++xvstelm.h $xr0, $a0, 2, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvstelm.h $xr0, $a0, 2, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvrepl128vei.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvrepl128vei.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvbitrevi.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitrevi.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitseti.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitseti.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitclri.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitclri.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvssrarni.bu.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvssrarni.bu.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlrni.bu.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlrni.bu.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvssrarni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrarni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlrni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlrni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrani.bu.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrani.bu.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlni.bu.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlni.bu.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrani.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvssrani.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrarni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrarni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlrni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlrni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrani.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvsrani.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvsrari.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsrari.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlri.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlri.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsllwil.wu.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvsllwil.wu.hu $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvsllwil.w.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsllwil.w.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvrotri.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvrotri.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsrai.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsrai.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsrli.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsrli.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvslli.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvslli.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsat.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ + xvsat.h $xr0, $xr1, 16 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + ++xvsat.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsat.hu $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++## uimm5 ++xvstelm.b $xr0, $a0, 1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvstelm.b $xr0, $a0, 1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbsrl.v $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvbsrl.v $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvbsll.v $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvbsll.v $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvfrstpi.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++xvfrstpi.h $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++xvfrstpi.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++xvfrstpi.b $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++xvbitrevi.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitrevi.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitseti.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitseti.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitclri.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitclri.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvssrarni.hu.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvssrarni.hu.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlrni.hu.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlrni.hu.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvssrarni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrarni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlrni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlrni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrani.hu.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrani.hu.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlni.hu.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlni.hu.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrani.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvssrani.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrarni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrarni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlrni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlrni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrani.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvsrani.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvsrari.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsrari.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlri.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlri.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsllwil.du.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvsllwil.du.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvsllwil.d.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsllwil.d.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvrotri.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvrotri.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsrai.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvsrai.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvsrli.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvsrli.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvslli.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvslli.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsat.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++xvsat.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++xvsat.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvsat.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ + ## simm5 ++xvslti.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ + xvseqi.b $xr0, $xr1, 16 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + ++xvmaxi.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++## uimm6 ++xvbitrevi.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitrevi.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitseti.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitseti.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitclri.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitclri.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvssrarni.wu.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] ++ ++xvssrarni.wu.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlrni.wu.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlrni.wu.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] ++ ++xvssrarni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrarni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlrni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlrni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrani.wu.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrani.wu.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlni.wu.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlni.wu.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrani.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvssrani.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrarni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrarni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlrni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlrni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrani.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvsrani.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvsrari.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvsrari.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlri.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlri.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvrotri.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvrotri.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvsrai.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsrai.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsrli.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsrli.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvslli.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvslli.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsat.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++xvsat.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++xvsat.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsat.du $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ + ## uimm7 ++xvssrarni.du.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] ++ ++xvssrarni.du.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlrni.du.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlrni.du.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] ++ ++xvssrarni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrarni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlrni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlrni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrani.du.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrani.du.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlni.du.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlni.du.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrani.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvssrani.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrarni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrarni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrlrni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrlrni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrani.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++xvsrani.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++xvsrlni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ + xvsrlni.d.q $xr0, $xr1, 128 + # CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +-## simm8 ++## uimm8 ++xvextrins.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.d $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.w $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.h $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.q $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.d $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ + xvpermi.w $xr0, $xr1, 256 + # CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + ++xvshuf4i.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.d $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.w $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.h $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvbitseli.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvbitseli.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvandi.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvandi.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvori.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++xvori.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++xvxori.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvxori.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvnori.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvnori.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++## simm8 ++xvstelm.b $xr0, $a0, -129, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] ++ ++xvstelm.b $xr0, $a0, 128, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] ++ + ## simm8_lsl1 +-xvstelm.h $xr0, $a0, 255, 1 ++xvstelm.h $xr0, $a0, -258, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] ++ ++xvstelm.h $xr0, $a0, 256, 1 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] + + ## simm8_lsl2 +-xvstelm.w $xr0, $a0, 512, 1 ++xvstelm.w $xr0, $a0, -516, 1 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] + +-## simm10 +-xvrepli.b $xr0, 512 +-# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++xvstelm.w $xr0, $a0, 512, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] + + ## simm8_lsl3 ++xvstelm.d $xr0, $a0, -1032, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] ++ + xvstelm.d $xr0, $a0, 1024, 1 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] + + ## simm9_lsl3 ++xvldrepl.d $xr0, $a0, -2056 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] ++ + xvldrepl.d $xr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] + + ## simm10_lsl2 ++xvldrepl.w $xr0, $a0, -2052 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] ++ + xvldrepl.w $xr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] + ++## simm10 ++xvrepli.b $xr0, -513 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.b $xr0, 512 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.h $xr0, -513 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.h $xr0, 512 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.w $xr0, -513 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.w $xr0, 512 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.d $xr0, -513 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.d $xr0, 512 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ + ## simm11_lsl1 ++xvldrepl.h $xr0, $a0, -2050 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] ++ + xvldrepl.h $xr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] + ++## simm12 ++xvldrepl.b $xr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvldrepl.b $xr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvst $xr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvst $xr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvld $xr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvld $xr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] ++ + ## simm13 ++xvldi $xr0, -4097 ++# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] ++ + xvldi $xr0, 4096 + # CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] +-- +2.20.1 + diff --git a/0008-LoongArch-test-Remove-the-FIXME-in-psabi-restricted-.patch b/0008-LoongArch-test-Remove-the-FIXME-in-psabi-restricted-.patch new file mode 100644 index 0000000..4c5163f --- /dev/null +++ b/0008-LoongArch-test-Remove-the-FIXME-in-psabi-restricted-.patch @@ -0,0 +1,29 @@ +From 3a1d20e29b554eae102be4339a612bee92cb56ae Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Tue, 25 Jun 2024 09:52:17 +0800 +Subject: [PATCH 08/23] [LoongArch][test] Remove the FIXME in + psabi-restricted-scheduling.ll which has been addressed by #76555 + +(cherry picked from commit 7ea63b9db4198688873036f3b0b81f9124076f7a) +--- + llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll +index 150a935d7bf8..a515939b9c2b 100644 +--- a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll ++++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll +@@ -8,10 +8,6 @@ + ; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=1 < %s \ + ; RUN: | FileCheck %s --check-prefix=LARGE_SCH + +-;; FIXME: According to the description of the psABI v2.30, the code sequences +-;; of `PseudoLA*_LARGE` instruction and Medium code model's function call must +-;; be adjacent. +- + @g = dso_local global i64 zeroinitializer, align 4 + @G = global i64 zeroinitializer, align 4 + @gd = external thread_local global i64 +-- +2.20.1 + diff --git a/0009-ELF-RISCV-Implement-emit-relocs-with-relaxation.patch b/0009-ELF-RISCV-Implement-emit-relocs-with-relaxation.patch new file mode 100644 index 0000000..76ccccf --- /dev/null +++ b/0009-ELF-RISCV-Implement-emit-relocs-with-relaxation.patch @@ -0,0 +1,238 @@ +From d53182c7fcc371f575fd71fa74e28220db6e9b82 Mon Sep 17 00:00:00 2001 +From: Job Noorman +Date: Sat, 9 Sep 2023 10:24:16 +0200 +Subject: [PATCH 09/14] [ELF][RISCV] Implement --emit-relocs with relaxation + +Linker relaxation may change relocations (offsets and types). However, +when --emit-relocs is used, relocations are simply copied from the input +section causing a mismatch with the corresponding (relaxed) code +section. + +This patch fixes this as follows: for non-relocatable RISC-V binaries, +`InputSection::copyRelocations` reads relocations from the relocated +section's `relocations` array (since this gets updated by the relaxation +code). For all other cases, relocations are read from the input section +directly as before. + +In order to reuse as much code as possible, and to keep the diff small, +the original `InputSection::copyRelocations` is changed to accept the +relocations as a range of `Relocation` objects. This means that, in the +general case when reading from the input section, raw relocations need +to be converted to `Relocation`s first, which introduces quite a bit of +boiler plate. It also means there's a slight code size increase due to +the extra instantiations of `copyRelocations` (for both range types). + +Reviewed By: MaskRay + +Differential Revision: https://reviews.llvm.org/D159082 + +(cherry picked from commit 649cac3b627fa3d466b8807536c8be970cc8c32f) +--- + lld/ELF/InputSection.cpp | 56 ++++++++++++++++----- + lld/ELF/InputSection.h | 6 ++- + lld/test/ELF/riscv-relax-emit-relocs.s | 69 ++++++++++++++++++++++++++ + 3 files changed, 117 insertions(+), 14 deletions(-) + create mode 100644 lld/test/ELF/riscv-relax-emit-relocs.s + +diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp +index 2edaa2b40493..1aff6b968d86 100644 +--- a/lld/ELF/InputSection.cpp ++++ b/lld/ELF/InputSection.cpp +@@ -349,29 +349,61 @@ InputSectionBase *InputSection::getRelocatedSection() const { + return sections[info]; + } + ++template ++void InputSection::copyRelocations(uint8_t *buf) { ++ if (config->relax && !config->relocatable && config->emachine == EM_RISCV) { ++ // On RISC-V, relaxation might change relocations: copy from ++ // internal ones that are updated by relaxation. ++ InputSectionBase *sec = getRelocatedSection(); ++ copyRelocations(buf, llvm::make_range(sec->relocations.begin(), ++ sec->relocations.end())); ++ } else { ++ // Convert the raw relocations in the input section into Relocation objects ++ // suitable to be used by copyRelocations below. ++ struct MapRel { ++ const ObjFile &file; ++ Relocation operator()(const RelTy &rel) const { ++ // RelExpr is not used so set to a dummy value. ++ return Relocation{R_NONE, rel.getType(config->isMips64EL), rel.r_offset, ++ getAddend(rel), &file.getRelocTargetSym(rel)}; ++ } ++ }; ++ ++ using RawRels = ArrayRef; ++ using MapRelIter = ++ llvm::mapped_iterator; ++ auto mapRel = MapRel{*getFile()}; ++ RawRels rawRels = getDataAs(); ++ auto rels = llvm::make_range(MapRelIter(rawRels.begin(), mapRel), ++ MapRelIter(rawRels.end(), mapRel)); ++ copyRelocations(buf, rels); ++ } ++} ++ + // This is used for -r and --emit-relocs. We can't use memcpy to copy + // relocations because we need to update symbol table offset and section index + // for each relocation. So we copy relocations one by one. +-template +-void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { ++template ++void InputSection::copyRelocations(uint8_t *buf, ++ llvm::iterator_range rels) { + const TargetInfo &target = *elf::target; + InputSectionBase *sec = getRelocatedSection(); + (void)sec->contentMaybeDecompress(); // uncompress if needed + +- for (const RelTy &rel : rels) { +- RelType type = rel.getType(config->isMips64EL); ++ for (const Relocation &rel : rels) { ++ RelType type = rel.type; + const ObjFile *file = getFile(); +- Symbol &sym = file->getRelocTargetSym(rel); ++ Symbol &sym = *rel.sym; + + auto *p = reinterpret_cast(buf); + buf += sizeof(RelTy); + + if (RelTy::IsRela) +- p->r_addend = getAddend(rel); ++ p->r_addend = rel.addend; + + // Output section VA is zero for -r, so r_offset is an offset within the + // section, but for --emit-relocs it is a virtual address. +- p->r_offset = sec->getVA(rel.r_offset); ++ p->r_offset = sec->getVA(rel.offset); + p->setSymbolAndType(in.symTab->getSymbolIndex(&sym), type, + config->isMips64EL); + +@@ -408,8 +440,8 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { + continue; + } + +- int64_t addend = getAddend(rel); +- const uint8_t *bufLoc = sec->content().begin() + rel.r_offset; ++ int64_t addend = rel.addend; ++ const uint8_t *bufLoc = sec->content().begin() + rel.offset; + if (!RelTy::IsRela) + addend = target.getImplicitAddend(bufLoc, type); + +@@ -432,7 +464,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { + if (RelTy::IsRela) + p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr; + else if (config->relocatable && type != target.noneRel) +- sec->addReloc({R_ABS, type, rel.r_offset, addend, &sym}); ++ sec->addReloc({R_ABS, type, rel.offset, addend, &sym}); + } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 && + p->r_addend >= 0x8000 && sec->file->ppc32Got2) { + // Similar to R_MIPS_GPREL{16,32}. If the addend of R_PPC_PLTREL24 +@@ -1106,11 +1138,11 @@ template void InputSection::writeTo(uint8_t *buf) { + // If -r or --emit-relocs is given, then an InputSection + // may be a relocation section. + if (LLVM_UNLIKELY(type == SHT_RELA)) { +- copyRelocations(buf, getDataAs()); ++ copyRelocations(buf); + return; + } + if (LLVM_UNLIKELY(type == SHT_REL)) { +- copyRelocations(buf, getDataAs()); ++ copyRelocations(buf); + return; + } + +diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h +index 15122d6abd6b..2b91711abba3 100644 +--- a/lld/ELF/InputSection.h ++++ b/lld/ELF/InputSection.h +@@ -396,8 +396,10 @@ public: + static InputSection discarded; + + private: +- template +- void copyRelocations(uint8_t *buf, llvm::ArrayRef rels); ++ template void copyRelocations(uint8_t *buf); ++ ++ template ++ void copyRelocations(uint8_t *buf, llvm::iterator_range rels); + + template void copyShtGroup(uint8_t *buf); + }; +diff --git a/lld/test/ELF/riscv-relax-emit-relocs.s b/lld/test/ELF/riscv-relax-emit-relocs.s +new file mode 100644 +index 000000000000..ebd69b742d4f +--- /dev/null ++++ b/lld/test/ELF/riscv-relax-emit-relocs.s +@@ -0,0 +1,69 @@ ++# REQUIRES: riscv ++## Test that we can handle --emit-relocs while relaxing. ++ ++# RUN: rm -rf %t && mkdir %t && cd %t ++ ++# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax %s -o 32.o ++# RUN: ld.lld -Ttext=0x10000 --emit-relocs 32.o -o 32 ++# RUN: llvm-objdump -dr --no-show-raw-insn -M no-aliases 32 | FileCheck %s ++ ++# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax %s -o 64.o ++# RUN: ld.lld -Ttext=0x10000 --emit-relocs 64.o -o 64 ++# RUN: llvm-objdump -dr --no-show-raw-insn -M no-aliases 64 | FileCheck %s ++ ++## -r should keep original relocations. ++# RUN: ld.lld -r 64.o -o 64.r ++# RUN: llvm-objdump -dr --no-show-raw-insn -M no-aliases 64.r | FileCheck %s --check-prefix=CHECKR ++ ++## --no-relax should keep original relocations. ++# RUN: ld.lld --emit-relocs --no-relax 64.o -o 64.norelax ++# RUN: llvm-objdump -dr --no-show-raw-insn -M no-aliases 64.norelax | FileCheck %s --check-prefix=CHECKNORELAX ++ ++# CHECK: <_start>: ++# CHECK-NEXT: jal ra, 0x10008 ++# CHECK-NEXT: R_RISCV_JAL f ++# CHECK-NEXT: R_RISCV_RELAX *ABS* ++# CHECK-NEXT: jal ra, 0x10008 ++# CHECK-NEXT: R_RISCV_JAL f ++# CHECK-NEXT: R_RISCV_RELAX *ABS* ++# CHECK-EMPTY: ++# CHECK-NEXT: : ++# CHECK-NEXT: jalr zero, 0(ra) ++# CHECK-NEXT: R_RISCV_ALIGN *ABS*+0x4 ++ ++# CHECKR: <_start>: ++# CHECKR-NEXT: auipc ra, 0 ++# CHECKR-NEXT: R_RISCV_CALL_PLT f ++# CHECKR-NEXT: R_RISCV_RELAX *ABS* ++# CHECKR-NEXT: jalr ra, 0(ra) ++# CHECKR-NEXT: auipc ra, 0 ++# CHECKR-NEXT: R_RISCV_CALL_PLT f ++# CHECKR-NEXT: R_RISCV_RELAX *ABS* ++# CHECKR-NEXT: jalr ra, 0(ra) ++# CHECKR-NEXT: addi zero, zero, 0 ++# CHECKR-NEXT: R_RISCV_ALIGN *ABS*+0x4 ++# CHECKR-EMPTY: ++# CHECKR-NEXT: : ++# CHECKR-NEXT: jalr zero, 0(ra) ++ ++# CHECKNORELAX: <_start>: ++# CHECKNORELAX-NEXT: auipc ra, 0 ++# CHECKNORELAX-NEXT: R_RISCV_CALL_PLT f ++# CHECKNORELAX-NEXT: R_RISCV_RELAX *ABS* ++# CHECKNORELAX-NEXT: jalr ra, 16(ra) ++# CHECKNORELAX-NEXT: auipc ra, 0 ++# CHECKNORELAX-NEXT: R_RISCV_CALL_PLT f ++# CHECKNORELAX-NEXT: R_RISCV_RELAX *ABS* ++# CHECKNORELAX-NEXT: jalr ra, 8(ra) ++# CHECKNORELAX-EMPTY: ++# CHECKNORELAX-NEXT: : ++# CHECKNORELAX-NEXT: jalr zero, 0(ra) ++# CHECKNORELAX-NEXT: R_RISCV_ALIGN *ABS*+0x4 ++ ++.global _start ++_start: ++ call f ++ call f ++ .balign 8 ++f: ++ ret +-- +2.20.1 + diff --git a/0009-LoongArch-Add-testcases-of-LSX-intrinsics-with-immed.patch b/0009-LoongArch-Add-testcases-of-LSX-intrinsics-with-immed.patch new file mode 100644 index 0000000..7c762dd --- /dev/null +++ b/0009-LoongArch-Add-testcases-of-LSX-intrinsics-with-immed.patch @@ -0,0 +1,5694 @@ +From 155078666b57e519a22f2700eb7d1a595141954c Mon Sep 17 00:00:00 2001 +From: chenli +Date: Mon, 21 Aug 2023 11:03:49 +0800 +Subject: [PATCH 09/42] [LoongArch] Add testcases of LSX intrinsics with + immediates + +The testcases mainly cover three situations: +- the arguments which should be immediates are non immediates. +- the immediate is out of upper limit of the argument type. +- the immediate is out of lower limit of the argument type. + +Depends on D155829 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D157570 + +(cherry picked from commit 0c76f46ca676ebecbdf2c9f7e8b05421a234bbed) + +--- + .../lsx/intrinsic-addi-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-addi-non-imm.ll | 37 +++++ + .../lsx/intrinsic-andi-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-andi-non-imm.ll | 10 ++ + .../lsx/intrinsic-bitclr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-bitclr-non-imm.ll | 37 +++++ + .../lsx/intrinsic-bitrev-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-bitrev-non-imm.ll | 37 +++++ + .../lsx/intrinsic-bitseli-invalid-imm.ll | 17 +++ + .../lsx/intrinsic-bitseli-non-imm.ll | 10 ++ + .../lsx/intrinsic-bitset-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-bitset-non-imm.ll | 37 +++++ + .../lsx/intrinsic-bsll-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-bsll-non-imm.ll | 10 ++ + .../lsx/intrinsic-bsrl-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-bsrl-non-imm.ll | 10 ++ + .../lsx/intrinsic-extrins-invalid-imm.ll | 65 +++++++++ + .../lsx/intrinsic-extrins-non-imm.ll | 37 +++++ + .../lsx/intrinsic-frstp-invalid-imm.ll | 33 +++++ + .../LoongArch/lsx/intrinsic-frstp-non-imm.ll | 19 +++ + .../lsx/intrinsic-insgr2vr-invalid-imm.ll | 65 +++++++++ + .../lsx/intrinsic-insgr2vr-non-imm.ll | 37 +++++ + .../LoongArch/lsx/intrinsic-ld-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-ld-non-imm.ll | 10 ++ + .../lsx/intrinsic-ldi-invalid-imm.ll | 81 +++++++++++ + .../LoongArch/lsx/intrinsic-ldi-non-imm.ll | 46 +++++++ + .../lsx/intrinsic-ldrepl-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-ldrepl-non-imm.ll | 37 +++++ + .../lsx/intrinsic-max-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-max-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-min-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-min-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-nori-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-nori-non-imm.ll | 10 ++ + .../lsx/intrinsic-ori-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-ori-non-imm.ll | 10 ++ + .../lsx/intrinsic-permi-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-permi-non-imm.ll | 10 ++ + .../lsx/intrinsic-pickve2gr-invalid-imm.ll | 129 ++++++++++++++++++ + .../lsx/intrinsic-pickve2gr-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-replvei-invalid-imm.ll | 65 +++++++++ + .../lsx/intrinsic-replvei-non-imm.ll | 37 +++++ + .../lsx/intrinsic-rotr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-rotr-non-imm.ll | 37 +++++ + .../lsx/intrinsic-sat-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-sat-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-seq-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-seq-non-imm.ll | 37 +++++ + .../lsx/intrinsic-shuf4i-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-shuf4i-non-imm.ll | 37 +++++ + .../lsx/intrinsic-sle-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-sle-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-sll-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-sll-non-imm.ll | 37 +++++ + .../lsx/intrinsic-sllwil-invalid-imm.ll | 97 +++++++++++++ + .../LoongArch/lsx/intrinsic-sllwil-non-imm.ll | 55 ++++++++ + .../lsx/intrinsic-slt-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-slt-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-sra-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-sra-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srani-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srani-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srar-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srar-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srarni-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srarni-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srl-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srl-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srlni-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srlni-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srlr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srlr-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srlrni-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srlrni-non-imm.ll | 37 +++++ + .../lsx/intrinsic-ssrani-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-ssrani-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-ssrarni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lsx/intrinsic-ssrarni-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-ssrlni-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-ssrlni-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-ssrlrni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lsx/intrinsic-ssrlrni-non-imm.ll | 73 ++++++++++ + .../LoongArch/lsx/intrinsic-st-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-st-non-imm.ll | 10 ++ + .../lsx/intrinsic-stelm-invalid-imm.ll | 121 ++++++++++++++++ + .../LoongArch/lsx/intrinsic-stelm-non-imm.ll | 65 +++++++++ + .../lsx/intrinsic-subi-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-subi-non-imm.ll | 37 +++++ + .../lsx/intrinsic-xori-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-xori-non-imm.ll | 10 ++ + 90 files changed, 4949 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll +new file mode 100644 +index 000000000000..6875872b6f83 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vaddi_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vaddi_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vaddi_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vaddi_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vaddi_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vaddi_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vaddi_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vaddi_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll +new file mode 100644 +index 000000000000..87d32b3ce02a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vaddi_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll +new file mode 100644 +index 000000000000..82a117b2aba5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vandi_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vandi_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll +new file mode 100644 +index 000000000000..c0c35c775266 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vandi_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll +new file mode 100644 +index 000000000000..b020806cd86c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitclri_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbitclri_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitclri_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vbitclri_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitclri_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vbitclri_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitclri_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vbitclri_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll +new file mode 100644 +index 000000000000..df6cdb99cdbc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll +new file mode 100644 +index 000000000000..24b6ec3284cb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitrevi_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbitrevi_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitrevi_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vbitrevi_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitrevi_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vbitrevi_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitrevi_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vbitrevi_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll +new file mode 100644 +index 000000000000..3ffb494c9907 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll +new file mode 100644 +index 000000000000..bc63b40e9fca +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseli_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbitseli_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll +new file mode 100644 +index 000000000000..52c1eb7d2024 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll +new file mode 100644 +index 000000000000..e57e14d8cb07 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseti_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbitseti_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitseti_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vbitseti_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitseti_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vbitseti_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitseti_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vbitseti_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll +new file mode 100644 +index 000000000000..9b2bde015ed9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll +new file mode 100644 +index 000000000000..eb49af49c9be +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsll_v_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbsll_v_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll +new file mode 100644 +index 000000000000..5b10c9e91a4f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsll_v(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll +new file mode 100644 +index 000000000000..bf56822e2ef5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsrl_v_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbsrl_v_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll +new file mode 100644 +index 000000000000..0bc038c869ce +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll +new file mode 100644 +index 000000000000..7f94234ed603 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vextrins_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vextrins_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 256) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vextrins_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vextrins_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 256) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vextrins_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vextrins_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 256) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vextrins_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vextrins_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 256) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll +new file mode 100644 +index 000000000000..e834002bb60b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll +new file mode 100644 +index 000000000000..0184c855c9c1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll +@@ -0,0 +1,33 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vfrstpi_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vfrstpi_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vfrstpi_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vfrstpi_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll +new file mode 100644 +index 000000000000..9583f672a305 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll +@@ -0,0 +1,19 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll +new file mode 100644 +index 000000000000..3d4f84fb6e03 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) ++ ++define <16 x i8> @lsx_vinsgr2vr_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vinsgr2vr_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) ++ ++define <8 x i16> @lsx_vinsgr2vr_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vinsgr2vr_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 8) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) ++ ++define <4 x i32> @lsx_vinsgr2vr_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vinsgr2vr_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 4) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) ++ ++define <2 x i64> @lsx_vinsgr2vr_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vinsgr2vr_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 2) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll +new file mode 100644 +index 000000000000..2a4c2218de8c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) ++ ++define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) ++ ++define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) ++ ++define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) ++ ++define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll +new file mode 100644 +index 000000000000..3aeb30ce66b4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) ++ ++define <16 x i8> @lsx_vld_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vld: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 -2049) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vld_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vld: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 2048) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll +new file mode 100644 +index 000000000000..db6a0318d87a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) ++ ++define <16 x i8> @lsx_vld(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 %a) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll +new file mode 100644 +index 000000000000..57f6f8e81d91 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll +@@ -0,0 +1,81 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) ++ ++define <2 x i64> @lsx_vldi_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vldi: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 -4097) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vldi_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vldi: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4096) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) ++ ++define <16 x i8> @lsx_vrepli_b_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 -513) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vrepli_b_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 512) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) ++ ++define <8 x i16> @lsx_vrepli_h_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 -513) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vrepli_h_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 512) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) ++ ++define <4 x i32> @lsx_vrepli_w_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 -513) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vrepli_w_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 512) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) ++ ++define <2 x i64> @lsx_vrepli_d_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 -513) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vrepli_d_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 512) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll +new file mode 100644 +index 000000000000..a8f8278f8097 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll +@@ -0,0 +1,46 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) ++ ++define <2 x i64> @lsx_vldi(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 %a) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) ++ ++define <16 x i8> @lsx_vrepli_b(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 %a) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) ++ ++define <8 x i16> @lsx_vrepli_h(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 %a) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) ++ ++define <4 x i32> @lsx_vrepli_w(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 %a) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) ++ ++define <2 x i64> @lsx_vrepli_d(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 %a) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll +new file mode 100644 +index 000000000000..cb640e1245da +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) ++ ++define <16 x i8> @lsx_vldrepl_b_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 -2049) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vldrepl_b_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 2048) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) ++ ++define <8 x i16> @lsx_vldrepl_h_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 -2050) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vldrepl_h_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2048) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) ++ ++define <4 x i32> @lsx_vldrepl_w_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 -2052) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vldrepl_w_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 2048) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) ++ ++define <2 x i64> @lsx_vldrepl_d_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 -2056) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vldrepl_d_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 2048) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll +new file mode 100644 +index 000000000000..e60b21913c69 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) ++ ++define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 %a) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) ++ ++define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 %a) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) ++ ++define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 %a) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) ++ ++define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 %a) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll +new file mode 100644 +index 000000000000..667ba32723fc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vmaxi_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vmaxi_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vmaxi_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vmaxi_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vmaxi_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vmaxi_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vmaxi_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vmaxi_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll +new file mode 100644 +index 000000000000..34bbe3495670 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll +new file mode 100644 +index 000000000000..b73bada4f06f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vmini_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vmini_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vmini_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vmini_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vmini_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vmini_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vmini_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vmini_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll +new file mode 100644 +index 000000000000..5d9b98cec4d0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll +new file mode 100644 +index 000000000000..8c59d8fb9fa5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vnori_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vnori_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll +new file mode 100644 +index 000000000000..322a39c106a6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vnori_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll +new file mode 100644 +index 000000000000..4a7fc7e109d9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vori_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vori_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll +new file mode 100644 +index 000000000000..5644b8581dce +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vori_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll +new file mode 100644 +index 000000000000..e439bbae6130 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vpermi_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vpermi_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 256) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll +new file mode 100644 +index 000000000000..bdfc08ed680a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll +new file mode 100644 +index 000000000000..3430c54d2194 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 16) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 8) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 4) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 -1) ++ ret i64 %res ++} ++ ++define i64 @lsx_vpickve2gr_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 2) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 16) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 8) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 4) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 -1) ++ ret i64 %res ++} ++ ++define i64 @lsx_vpickve2gr_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 2) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll +new file mode 100644 +index 000000000000..6dd3c1f27a81 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 %b) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 %b) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll +new file mode 100644 +index 000000000000..d625441122a6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vreplvei_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vreplvei_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vreplvei_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vreplvei_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 8) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vreplvei_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vreplvei_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 4) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vreplvei_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vreplvei_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 2) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll +new file mode 100644 +index 000000000000..3d271bb2b307 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll +new file mode 100644 +index 000000000000..3c53b36672ad +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vrotri_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vrotri_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vrotri_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vrotri_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vrotri_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vrotri_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vrotri_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vrotri_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll +new file mode 100644 +index 000000000000..fd8ba3a1c633 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vrotri_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vrotri_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vrotri_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vrotri_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll +new file mode 100644 +index 000000000000..45fa4e43be19 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsat_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsat_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsat_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsat_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsat_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsat_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsat_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsat_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll +new file mode 100644 +index 000000000000..afdbe0c1ce0b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll +new file mode 100644 +index 000000000000..220398ff28cd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vseqi_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vseqi_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vseqi_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vseqi_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vseqi_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vseqi_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vseqi_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vseqi_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll +new file mode 100644 +index 000000000000..5fa1dd30475c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vseqi_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vseqi_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vseqi_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vseqi_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll +new file mode 100644 +index 000000000000..4d6fadf08c26 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vshuf4i_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vshuf4i_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vshuf4i_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vshuf4i_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 256) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vshuf4i_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vshuf4i_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 256) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vshuf4i_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vshuf4i_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 256) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll +new file mode 100644 +index 000000000000..a7d138bcc00b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll +new file mode 100644 +index 000000000000..4c945e296711 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslei_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslei_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslei_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslei_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslei_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslei_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslei_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslei_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll +new file mode 100644 +index 000000000000..0fc137bf0549 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll +new file mode 100644 +index 000000000000..75406f94887c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslli_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslli_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslli_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslli_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslli_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslli_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslli_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslli_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll +new file mode 100644 +index 000000000000..7474b5e29734 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslli_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslli_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslli_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslli_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll +new file mode 100644 +index 000000000000..bda3523a0b5c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll +@@ -0,0 +1,97 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_h_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsllwil_h_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 8) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_w_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsllwil_w_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_d_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsllwil_d_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 32) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_hu_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsllwil_hu_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 8) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_wu_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsllwil_wu_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_du_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsllwil_du_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll +new file mode 100644 +index 000000000000..a03656d5ca07 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll +@@ -0,0 +1,55 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll +new file mode 100644 +index 000000000000..f6d014b19d6c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslti_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslti_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslti_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslti_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslti_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslti_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslti_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslti_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll +new file mode 100644 +index 000000000000..9a8b757dab4e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll +new file mode 100644 +index 000000000000..2a033a21b565 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrai_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrai_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrai_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrai_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrai_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrai_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrai_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrai_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll +new file mode 100644 +index 000000000000..c3b328145864 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrai_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrai_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrai_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrai_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll +new file mode 100644 +index 000000000000..d68064e9b902 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll +new file mode 100644 +index 000000000000..38cfde214dc1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll +new file mode 100644 +index 000000000000..b6c2d70cebbc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrari_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrari_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrari_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrari_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrari_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrari_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrari_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrari_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll +new file mode 100644 +index 000000000000..2ad8adcd823b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrari_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrari_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrari_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrari_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll +new file mode 100644 +index 000000000000..d24cf92a0392 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll +new file mode 100644 +index 000000000000..19de7445cba1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll +new file mode 100644 +index 000000000000..3beff790afab +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrli_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrli_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrli_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrli_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrli_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrli_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrli_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrli_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll +new file mode 100644 +index 000000000000..98652aca0d62 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrli_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrli_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrli_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrli_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll +new file mode 100644 +index 000000000000..054c4f393548 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll +new file mode 100644 +index 000000000000..76341df197fd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll +new file mode 100644 +index 000000000000..bcbd38e26e5f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlri_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrlri_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlri_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrlri_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlri_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrlri_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlri_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrlri_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll +new file mode 100644 +index 000000000000..4862b1546ccf +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll +new file mode 100644 +index 000000000000..8988ae88f9eb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll +new file mode 100644 +index 000000000000..e5530db56fed +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll +new file mode 100644 +index 000000000000..f7817921ebeb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrani_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrani_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrani_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrani_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll +new file mode 100644 +index 000000000000..a80ede9c5243 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll +new file mode 100644 +index 000000000000..4edda8c0a24a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrarni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrarni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrarni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrarni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll +new file mode 100644 +index 000000000000..a77e6e764c9d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll +new file mode 100644 +index 000000000000..6218af1fa773 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrlni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrlni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrlni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrlni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll +new file mode 100644 +index 000000000000..688be826f467 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll +new file mode 100644 +index 000000000000..98a0c5b3cd28 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrlrni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrlrni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrlrni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrlrni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll +new file mode 100644 +index 000000000000..c389b4fd6023 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll +new file mode 100644 +index 000000000000..64518380964b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) ++ ++define void @lsx_vst_lo(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vst: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2049) ++ ret void ++} ++ ++define void @lsx_vst_hi(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vst: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 2048) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll +new file mode 100644 +index 000000000000..119ed9b78658 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) ++ ++define void @lsx_vst(<16 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 %b) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll +new file mode 100644 +index 000000000000..277abcbd34cc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll +@@ -0,0 +1,121 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) ++ ++define void @lsx_vstelm_b_lo(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 -129, i32 15) ++ ret void ++} ++ ++define void @lsx_vstelm_b_hi(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 128, i32 15) ++ ret void ++} ++ ++define void @lsx_vstelm_b_idx_lo(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 -1) ++ ret void ++} ++ ++define void @lsx_vstelm_b_idx_hi(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 16) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) ++ ++define void @lsx_vstelm_h_lo(<8 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 -258, i32 7) ++ ret void ++} ++ ++define void @lsx_vstelm_h_hi(<8 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 256, i32 7) ++ ret void ++} ++ ++define void @lsx_vstelm_h_idx_lo(<8 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 -1) ++ ret void ++} ++ ++define void @lsx_vstelm_h_idx_hi(<8 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 8) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) ++ ++define void @lsx_vstelm_w_lo(<4 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 -516, i32 3) ++ ret void ++} ++ ++define void @lsx_vstelm_w_hi(<4 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 512, i32 3) ++ ret void ++} ++ ++define void @lsx_vstelm_w_idx_lo(<4 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 -1) ++ ret void ++} ++ ++define void @lsx_vstelm_w_idx_hi(<4 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 4) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) ++ ++define void @lsx_vstelm_d_lo(<2 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 -1032, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_d_hi(<2 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 1024, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_d_idx_lo(<2 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 -1) ++ ret void ++} ++ ++define void @lsx_vstelm_d_idx_hi(<2 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 2) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll +new file mode 100644 +index 000000000000..f53932f79035 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) ++ ++define void @lsx_vstelm_b(<16 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_b_idx(<16 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) ++ ++define void @lsx_vstelm_h(<8 x i16> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_h_idx(<8 x i16> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) ++ ++define void @lsx_vstelm_w(<4 x i32> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_w_idx(<4 x i32> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) ++ ++define void @lsx_vstelm_d(<2 x i64> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_d_idx(<2 x i64> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 %b) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll +new file mode 100644 +index 000000000000..96cc1241fbf3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsubi_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsubi_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsubi_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsubi_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsubi_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsubi_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsubi_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsubi_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll +new file mode 100644 +index 000000000000..162f9ad131c7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsubi_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll +new file mode 100644 +index 000000000000..5f5613189ac8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vxori_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vxori_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll +new file mode 100644 +index 000000000000..4238d89120f1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vxori_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +-- +2.20.1 + diff --git a/0009-LoongArch-Fix-td-pattern-for-CACOP-LDPTE-and-LDDIR.patch b/0009-LoongArch-Fix-td-pattern-for-CACOP-LDPTE-and-LDDIR.patch new file mode 100644 index 0000000..1d0dbff --- /dev/null +++ b/0009-LoongArch-Fix-td-pattern-for-CACOP-LDPTE-and-LDDIR.patch @@ -0,0 +1,43 @@ +From 1708e19c1f2ad47bb8bd83c0a234ed7843fa4e8d Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Fri, 20 Oct 2023 10:44:55 +0800 +Subject: [PATCH 09/27] [LoongArch] Fix td pattern for CACOP LDPTE and LDDIR + +The immediate argument should be a target constant (`timm`). + +(cherry picked from commit 47826b3f148996767ebd2c67ee41c329cb364fef) +--- + llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index b2c4bb812ba5..166379d7d592 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1857,9 +1857,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>; + /// Intrinsics + + def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12), +- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; ++ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; + def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12), +- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; ++ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; + def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>; + def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>; + def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>; +@@ -2023,9 +2023,9 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk), + def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk), + (ASRTGT_D GPR:$rj, GPR:$rk)>; + def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8), +- (LDDIR GPR:$rj, uimm8:$imm8)>; ++ (LDDIR GPR:$rj, timm:$imm8)>; + def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8), +- (LDPTE GPR:$rj, uimm8:$imm8)>; ++ (LDPTE GPR:$rj, timm:$imm8)>; + } // Predicates = [IsLA64] + + //===----------------------------------------------------------------------===// +-- +2.20.1 + diff --git a/0009-LoongArch-Reimplement-the-expansion-of-PseudoLA-_LAR.patch b/0009-LoongArch-Reimplement-the-expansion-of-PseudoLA-_LAR.patch new file mode 100644 index 0000000..aef81c5 --- /dev/null +++ b/0009-LoongArch-Reimplement-the-expansion-of-PseudoLA-_LAR.patch @@ -0,0 +1,1278 @@ +From c6e931abb8617341ab49d85f80e99fd2c328ddca Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 2 Jan 2024 10:57:15 +0800 +Subject: [PATCH 09/23] [LoongArch] Reimplement the expansion of + PseudoLA*_LARGE instructions (#76555) + +According to the description of the psABI v2.30: +https://github.com/loongson/la-abi-specs/releases/tag/v2.30, moved the +expansion of relevant pseudo-instructions from +`LoongArchPreRAExpandPseudo` pass to `LoongArchExpandPseudo` pass, to +ensure that the code sequences of `PseudoLA*_LARGE` instructions and +Medium code model's function call are not scheduled. + +(cherry picked from commit c56a5e895a96fec4292e9333d998cfa88770432a) +--- + .../LoongArch/LoongArchExpandPseudoInsts.cpp | 519 +++++++++--------- + .../LoongArch/LoongArchISelLowering.cpp | 24 +- + .../Target/LoongArch/LoongArchISelLowering.h | 4 + + .../Target/LoongArch/LoongArchInstrInfo.td | 83 ++- + llvm/test/CodeGen/LoongArch/code-models.ll | 36 +- + llvm/test/CodeGen/LoongArch/expand-call.ll | 2 +- + llvm/test/CodeGen/LoongArch/global-address.ll | 32 +- + .../LoongArch/psabi-restricted-scheduling.ll | 102 ++-- + llvm/test/CodeGen/LoongArch/tls-models.ll | 68 +-- + 9 files changed, 487 insertions(+), 383 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +index 8eda2dcc1633..f977f176066a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +@@ -62,43 +62,24 @@ private: + MachineBasicBlock::iterator &NextMBBI, + unsigned FlagsHi, unsigned SecondOpcode, + unsigned FlagsLo); +- bool expandLargeAddressLoad(MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, +- unsigned LastOpcode, unsigned IdentifyingMO); +- bool expandLargeAddressLoad(MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, +- unsigned LastOpcode, unsigned IdentifyingMO, +- const MachineOperand &Symbol, Register DestReg, +- bool EraseFromParent); + bool expandLoadAddressPcrel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, +- bool Large = false); ++ MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressGot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, +- bool Large = false); ++ MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSLE(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSIE(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, +- bool Large = false); ++ MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSLD(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, +- bool Large = false); ++ MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSGD(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, +- bool Large = false); +- bool expandFunctionCALL(MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, +- bool IsTailCall); ++ MachineBasicBlock::iterator &NextMBBI); + }; + + char LoongArchPreRAExpandPseudo::ID = 0; +@@ -131,30 +112,16 @@ bool LoongArchPreRAExpandPseudo::expandMI( + switch (MBBI->getOpcode()) { + case LoongArch::PseudoLA_PCREL: + return expandLoadAddressPcrel(MBB, MBBI, NextMBBI); +- case LoongArch::PseudoLA_PCREL_LARGE: +- return expandLoadAddressPcrel(MBB, MBBI, NextMBBI, /*Large=*/true); + case LoongArch::PseudoLA_GOT: + return expandLoadAddressGot(MBB, MBBI, NextMBBI); +- case LoongArch::PseudoLA_GOT_LARGE: +- return expandLoadAddressGot(MBB, MBBI, NextMBBI, /*Large=*/true); + case LoongArch::PseudoLA_TLS_LE: + return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_IE: + return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI); +- case LoongArch::PseudoLA_TLS_IE_LARGE: +- return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI, /*Large=*/true); + case LoongArch::PseudoLA_TLS_LD: + return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI); +- case LoongArch::PseudoLA_TLS_LD_LARGE: +- return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI, /*Large=*/true); + case LoongArch::PseudoLA_TLS_GD: + return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI); +- case LoongArch::PseudoLA_TLS_GD_LARGE: +- return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI, /*Large=*/true); +- case LoongArch::PseudoCALL: +- return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); +- case LoongArch::PseudoTAIL: +- return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); + } + return false; + } +@@ -187,118 +154,9 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair( + return true; + } + +-bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( +- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, +- unsigned IdentifyingMO) { +- MachineInstr &MI = *MBBI; +- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, +- MI.getOperand(2), MI.getOperand(0).getReg(), +- true); +-} +- +-bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( +- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, +- unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, +- bool EraseFromParent) { +- // Code Sequence: +- // +- // Part1: pcalau12i $scratch, %MO1(sym) +- // Part0: addi.d $dest, $zero, %MO0(sym) +- // Part2: lu32i.d $dest, %MO2(sym) +- // Part3: lu52i.d $dest, $dest, %MO3(sym) +- // Fin: LastOpcode $dest, $dest, $scratch +- +- unsigned MO0, MO1, MO2, MO3; +- switch (IdentifyingMO) { +- default: +- llvm_unreachable("unsupported identifying MO"); +- case LoongArchII::MO_PCREL_LO: +- MO0 = IdentifyingMO; +- MO1 = LoongArchII::MO_PCREL_HI; +- MO2 = LoongArchII::MO_PCREL64_LO; +- MO3 = LoongArchII::MO_PCREL64_HI; +- break; +- case LoongArchII::MO_GOT_PC_HI: +- case LoongArchII::MO_LD_PC_HI: +- case LoongArchII::MO_GD_PC_HI: +- // These cases relocate just like the GOT case, except for Part1. +- MO0 = LoongArchII::MO_GOT_PC_LO; +- MO1 = IdentifyingMO; +- MO2 = LoongArchII::MO_GOT_PC64_LO; +- MO3 = LoongArchII::MO_GOT_PC64_HI; +- break; +- case LoongArchII::MO_IE_PC_LO: +- MO0 = IdentifyingMO; +- MO1 = LoongArchII::MO_IE_PC_HI; +- MO2 = LoongArchII::MO_IE_PC64_LO; +- MO3 = LoongArchII::MO_IE_PC64_HI; +- break; +- } +- +- MachineFunction *MF = MBB.getParent(); +- MachineInstr &MI = *MBBI; +- DebugLoc DL = MI.getDebugLoc(); +- +- assert(MF->getSubtarget().is64Bit() && +- "Large code model requires LA64"); +- +- Register TmpPart1 = +- MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); +- Register TmpPart0 = +- DestReg.isVirtual() +- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) +- : DestReg; +- Register TmpParts02 = +- DestReg.isVirtual() +- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) +- : DestReg; +- Register TmpParts023 = +- DestReg.isVirtual() +- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) +- : DestReg; +- +- auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), TmpPart1); +- auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), TmpPart0) +- .addReg(LoongArch::R0); +- auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), TmpParts02) +- // "rj" is needed due to InstrInfo pattern requirement. +- .addReg(TmpPart0, RegState::Kill); +- auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), TmpParts023) +- .addReg(TmpParts02, RegState::Kill); +- BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) +- .addReg(TmpParts023) +- .addReg(TmpPart1, RegState::Kill); +- +- if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { +- const char *SymName = Symbol.getSymbolName(); +- Part0.addExternalSymbol(SymName, MO0); +- Part1.addExternalSymbol(SymName, MO1); +- Part2.addExternalSymbol(SymName, MO2); +- Part3.addExternalSymbol(SymName, MO3); +- } else { +- Part0.addDisp(Symbol, 0, MO0); +- Part1.addDisp(Symbol, 0, MO1); +- Part2.addDisp(Symbol, 0, MO2); +- Part3.addDisp(Symbol, 0, MO3); +- } +- +- if (EraseFromParent) +- MI.eraseFromParent(); +- +- return true; +-} +- + bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, bool Large) { +- if (Large) +- // Emit the 5-insn large address load sequence with the `%pc` family of +- // relocs. +- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, +- LoongArchII::MO_PCREL_LO); +- ++ MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %pc_hi20(sym) + // addi.w/d $rd, $rd, %pc_lo12(sym) +@@ -311,13 +169,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( + + bool LoongArchPreRAExpandPseudo::expandLoadAddressGot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, bool Large) { +- if (Large) +- // Emit the 5-insn large address load sequence with the `%got_pc` family +- // of relocs, loading the result from GOT with `ldx.d` in the end. +- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, +- LoongArchII::MO_GOT_PC_HI); +- ++ MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %got_pc_hi20(sym) + // ld.w/d $rd, $rd, %got_pc_lo12(sym) +@@ -378,13 +230,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE( + + bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, bool Large) { +- if (Large) +- // Emit the 5-insn large address load sequence with the `%ie_pc` family +- // of relocs, loading the result with `ldx.d` in the end. +- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, +- LoongArchII::MO_IE_PC_LO); +- ++ MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %ie_pc_hi20(sym) + // ld.w/d $rd, $rd, %ie_pc_lo12(sym) +@@ -397,13 +243,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( + + bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, bool Large) { +- if (Large) +- // Emit the 5-insn large address load sequence with the `%got_pc` family +- // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. +- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, +- LoongArchII::MO_LD_PC_HI); +- ++ MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %ld_pc_hi20(sym) + // addi.w/d $rd, $rd, %got_pc_lo12(sym) +@@ -416,13 +256,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( + + bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, bool Large) { +- if (Large) +- // Emit the 5-insn large address load sequence with the `%got_pc` family +- // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. +- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, +- LoongArchII::MO_GD_PC_HI); +- ++ MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %gd_pc_hi20(sym) + // addi.w/d $rd, $rd, %got_pc_lo12(sym) +@@ -433,85 +267,6 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( + SecondOpcode, LoongArchII::MO_GOT_PC_LO); + } + +-bool LoongArchPreRAExpandPseudo::expandFunctionCALL( +- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { +- MachineFunction *MF = MBB.getParent(); +- MachineInstr &MI = *MBBI; +- DebugLoc DL = MI.getDebugLoc(); +- const MachineOperand &Func = MI.getOperand(0); +- MachineInstrBuilder CALL; +- unsigned Opcode; +- +- switch (MF->getTarget().getCodeModel()) { +- default: +- report_fatal_error("Unsupported code model"); +- break; +- case CodeModel::Small: { +- // CALL: +- // bl func +- // TAIL: +- // b func +- Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL; +- CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func); +- break; +- } +- case CodeModel::Medium: { +- // CALL: +- // pcaddu18i $ra, %call36(func) +- // jirl $ra, $ra, 0 +- // TAIL: +- // pcaddu18i $scratch, %call36(func) +- // jirl $r0, $scratch, 0 +- Opcode = +- IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; +- Register ScratchReg = +- IsTailCall +- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) +- : LoongArch::R1; +- MachineInstrBuilder MIB = +- BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg); +- +- CALL = +- BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0); +- +- if (Func.isSymbol()) +- MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36); +- else +- MIB.addDisp(Func, 0, LoongArchII::MO_CALL36); +- break; +- } +- case CodeModel::Large: { +- // Emit the 5-insn large address load sequence, either directly or +- // indirectly in case of going through the GOT, then JIRL_TAIL or +- // JIRL_CALL to $addr. +- Opcode = +- IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; +- Register AddrReg = +- IsTailCall +- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) +- : LoongArch::R1; +- +- bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); +- unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; +- unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; +- expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, +- false); +- CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); +- break; +- } +- } +- +- // Transfer implicit operands. +- CALL.copyImplicitOps(MI); +- +- // Transfer MI flags. +- CALL.setMIFlags(MI.getFlags()); +- +- MI.eraseFromParent(); +- return true; +-} +- + class LoongArchExpandPseudo : public MachineFunctionPass { + public: + const LoongArchInstrInfo *TII; +@@ -533,6 +288,35 @@ private: + MachineBasicBlock::iterator &NextMBBI); + bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); ++ bool expandLargeAddressLoad(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI, ++ unsigned LastOpcode, unsigned IdentifyingMO); ++ bool expandLargeAddressLoad(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI, ++ unsigned LastOpcode, unsigned IdentifyingMO, ++ const MachineOperand &Symbol, Register DestReg, ++ bool EraseFromParent); ++ bool expandLoadAddressPcrelLarge(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI); ++ bool expandLoadAddressGotLarge(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI); ++ bool expandLoadAddressTLSIELarge(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI); ++ bool expandLoadAddressTLSLDLarge(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI); ++ bool expandLoadAddressTLSGDLarge(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI); ++ bool expandFunctionCALL(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI, ++ bool IsTailCall); + }; + + char LoongArchExpandPseudo::ID = 0; +@@ -567,6 +351,24 @@ bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, + switch (MBBI->getOpcode()) { + case LoongArch::PseudoCopyCFR: + return expandCopyCFR(MBB, MBBI, NextMBBI); ++ case LoongArch::PseudoLA_PCREL_LARGE: ++ return expandLoadAddressPcrelLarge(MBB, MBBI, NextMBBI); ++ case LoongArch::PseudoLA_GOT_LARGE: ++ return expandLoadAddressGotLarge(MBB, MBBI, NextMBBI); ++ case LoongArch::PseudoLA_TLS_IE_LARGE: ++ return expandLoadAddressTLSIELarge(MBB, MBBI, NextMBBI); ++ case LoongArch::PseudoLA_TLS_LD_LARGE: ++ return expandLoadAddressTLSLDLarge(MBB, MBBI, NextMBBI); ++ case LoongArch::PseudoLA_TLS_GD_LARGE: ++ return expandLoadAddressTLSGDLarge(MBB, MBBI, NextMBBI); ++ case LoongArch::PseudoCALL: ++ case LoongArch::PseudoCALL_MEDIUM: ++ case LoongArch::PseudoCALL_LARGE: ++ return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); ++ case LoongArch::PseudoTAIL: ++ case LoongArch::PseudoTAIL_MEDIUM: ++ case LoongArch::PseudoTAIL_LARGE: ++ return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); + } + + return false; +@@ -625,6 +427,213 @@ bool LoongArchExpandPseudo::expandCopyCFR( + return true; + } + ++bool LoongArchExpandPseudo::expandLargeAddressLoad( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, ++ unsigned IdentifyingMO) { ++ MachineInstr &MI = *MBBI; ++ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, ++ MI.getOperand(2), MI.getOperand(0).getReg(), ++ true); ++} ++ ++bool LoongArchExpandPseudo::expandLargeAddressLoad( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, ++ unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, ++ bool EraseFromParent) { ++ // Code Sequence: ++ // ++ // Part1: pcalau12i $dst, %MO1(sym) ++ // Part0: addi.d $t8, $zero, %MO0(sym) ++ // Part2: lu32i.d $t8, %MO2(sym) ++ // Part3: lu52i.d $t8, $t8, %MO3(sym) ++ // Fin: LastOpcode $dst, $t8, $dst ++ ++ unsigned MO0, MO1, MO2, MO3; ++ switch (IdentifyingMO) { ++ default: ++ llvm_unreachable("unsupported identifying MO"); ++ case LoongArchII::MO_PCREL_LO: ++ MO0 = IdentifyingMO; ++ MO1 = LoongArchII::MO_PCREL_HI; ++ MO2 = LoongArchII::MO_PCREL64_LO; ++ MO3 = LoongArchII::MO_PCREL64_HI; ++ break; ++ case LoongArchII::MO_GOT_PC_HI: ++ case LoongArchII::MO_LD_PC_HI: ++ case LoongArchII::MO_GD_PC_HI: ++ // These cases relocate just like the GOT case, except for Part1. ++ MO0 = LoongArchII::MO_GOT_PC_LO; ++ MO1 = IdentifyingMO; ++ MO2 = LoongArchII::MO_GOT_PC64_LO; ++ MO3 = LoongArchII::MO_GOT_PC64_HI; ++ break; ++ case LoongArchII::MO_IE_PC_LO: ++ MO0 = IdentifyingMO; ++ MO1 = LoongArchII::MO_IE_PC_HI; ++ MO2 = LoongArchII::MO_IE_PC64_LO; ++ MO3 = LoongArchII::MO_IE_PC64_HI; ++ break; ++ } ++ ++ MachineFunction *MF = MBB.getParent(); ++ MachineInstr &MI = *MBBI; ++ DebugLoc DL = MI.getDebugLoc(); ++ Register ScratchReg = LoongArch::R20; // $t8 ++ ++ assert(MF->getSubtarget().is64Bit() && ++ "Large code model requires LA64"); ++ ++ auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), DestReg); ++ auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), ScratchReg) ++ .addReg(LoongArch::R0); ++ auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), ScratchReg) ++ // "rj" is needed due to InstrInfo pattern requirement. ++ .addReg(ScratchReg); ++ auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), ScratchReg) ++ .addReg(ScratchReg); ++ BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) ++ .addReg(ScratchReg) ++ .addReg(DestReg); ++ ++ if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { ++ const char *SymName = Symbol.getSymbolName(); ++ Part0.addExternalSymbol(SymName, MO0); ++ Part1.addExternalSymbol(SymName, MO1); ++ Part2.addExternalSymbol(SymName, MO2); ++ Part3.addExternalSymbol(SymName, MO3); ++ } else { ++ Part0.addDisp(Symbol, 0, MO0); ++ Part1.addDisp(Symbol, 0, MO1); ++ Part2.addDisp(Symbol, 0, MO2); ++ Part3.addDisp(Symbol, 0, MO3); ++ } ++ ++ if (EraseFromParent) ++ MI.eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandLoadAddressPcrelLarge( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI) { ++ // Emit the 5-insn large address load sequence with the `%pc` family of ++ // relocs. ++ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, ++ LoongArchII::MO_PCREL_LO); ++} ++ ++bool LoongArchExpandPseudo::expandLoadAddressGotLarge( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI) { ++ // Emit the 5-insn large address load sequence with the `%got_pc` family ++ // of relocs, loading the result from GOT with `ldx.d` in the end. ++ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, ++ LoongArchII::MO_GOT_PC_HI); ++} ++ ++bool LoongArchExpandPseudo::expandLoadAddressTLSIELarge( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI) { ++ // Emit the 5-insn large address load sequence with the `%ie_pc` family ++ // of relocs, loading the result with `ldx.d` in the end. ++ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, ++ LoongArchII::MO_IE_PC_LO); ++} ++ ++bool LoongArchExpandPseudo::expandLoadAddressTLSLDLarge( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI) { ++ // Emit the 5-insn large address load sequence with the `%got_pc` family ++ // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. ++ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, ++ LoongArchII::MO_LD_PC_HI); ++} ++ ++bool LoongArchExpandPseudo::expandLoadAddressTLSGDLarge( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI) { ++ // Emit the 5-insn large address load sequence with the `%got_pc` family ++ // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. ++ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, ++ LoongArchII::MO_GD_PC_HI); ++} ++ ++bool LoongArchExpandPseudo::expandFunctionCALL( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { ++ MachineFunction *MF = MBB.getParent(); ++ MachineInstr &MI = *MBBI; ++ DebugLoc DL = MI.getDebugLoc(); ++ const MachineOperand &Func = MI.getOperand(0); ++ MachineInstrBuilder CALL; ++ unsigned Opcode; ++ ++ switch (MF->getTarget().getCodeModel()) { ++ default: ++ report_fatal_error("Unsupported code model"); ++ break; ++ case CodeModel::Small: { ++ // CALL: ++ // bl func ++ // TAIL: ++ // b func ++ Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL; ++ CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func); ++ break; ++ } ++ case CodeModel::Medium: { ++ // CALL: ++ // pcaddu18i $ra, %call36(func) ++ // jirl $ra, $ra, 0 ++ // TAIL: ++ // pcaddu18i $t8, %call36(func) ++ // jr $t8 ++ Opcode = ++ IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; ++ Register ScratchReg = IsTailCall ? LoongArch::R20 : LoongArch::R1; ++ MachineInstrBuilder MIB = ++ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg); ++ ++ CALL = ++ BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0); ++ ++ if (Func.isSymbol()) ++ MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36); ++ else ++ MIB.addDisp(Func, 0, LoongArchII::MO_CALL36); ++ break; ++ } ++ case CodeModel::Large: { ++ // Emit the 5-insn large address load sequence, either directly or ++ // indirectly in case of going through the GOT, then JIRL_TAIL or ++ // JIRL_CALL to $addr. ++ Opcode = ++ IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; ++ Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1; ++ ++ bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); ++ unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; ++ unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; ++ expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, ++ false); ++ CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); ++ break; ++ } ++ } ++ ++ // Transfer implicit operands. ++ CALL.copyImplicitOps(MI); ++ ++ // Transfer MI flags. ++ CALL.setMIFlags(MI.getFlags()); ++ ++ MI.eraseFromParent(); ++ return true; ++} ++ + } // end namespace + + INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo", +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 4fc2b4709840..df1b17649b7d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -3389,8 +3389,12 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { + + // TODO: Add more target-dependent nodes later. + NODE_NAME_CASE(CALL) ++ NODE_NAME_CASE(CALL_MEDIUM) ++ NODE_NAME_CASE(CALL_LARGE) + NODE_NAME_CASE(RET) + NODE_NAME_CASE(TAIL) ++ NODE_NAME_CASE(TAIL_MEDIUM) ++ NODE_NAME_CASE(TAIL_LARGE) + NODE_NAME_CASE(SLL_W) + NODE_NAME_CASE(SRA_W) + NODE_NAME_CASE(SRL_W) +@@ -4248,15 +4252,31 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); ++ unsigned Op; ++ switch (DAG.getTarget().getCodeModel()) { ++ default: ++ report_fatal_error("Unsupported code model"); ++ case CodeModel::Small: ++ Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; ++ break; ++ case CodeModel::Medium: ++ assert(Subtarget.is64Bit() && "Medium code model requires LA64"); ++ Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; ++ break; ++ case CodeModel::Large: ++ assert(Subtarget.is64Bit() && "Large code model requires LA64"); ++ Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; ++ break; ++ } + + if (IsTailCall) { + MF.getFrameInfo().setHasTailCall(); +- SDValue Ret = DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops); ++ SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); + return Ret; + } + +- Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); ++ Chain = DAG.getNode(Op, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); + Glue = Chain.getValue(1); + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 2c9826a13237..a2ed149f4bb7 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -28,8 +28,12 @@ enum NodeType : unsigned { + + // TODO: add more LoongArchISDs + CALL, ++ CALL_MEDIUM, ++ CALL_LARGE, + RET, + TAIL, ++ TAIL_MEDIUM, ++ TAIL_LARGE, + + // 32-bit shifts, directly matching the semantics of the named LoongArch + // instructions. +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 67de5f7afd78..ecd0c2b71b85 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -69,6 +69,18 @@ def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, + def loongarch_tail : SDNode<"LoongArchISD::TAIL", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; ++def loongarch_call_medium : SDNode<"LoongArchISD::CALL_MEDIUM", SDT_LoongArchCall, ++ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, ++ SDNPVariadic]>; ++def loongarch_tail_medium : SDNode<"LoongArchISD::TAIL_MEDIUM", SDT_LoongArchCall, ++ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, ++ SDNPVariadic]>; ++def loongarch_call_large : SDNode<"LoongArchISD::CALL_LARGE", SDT_LoongArchCall, ++ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, ++ SDNPVariadic]>; ++def loongarch_tail_large : SDNode<"LoongArchISD::TAIL_LARGE", SDT_LoongArchCall, ++ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, ++ SDNPVariadic]>; + def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; + def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; + def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; +@@ -1327,16 +1339,43 @@ def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; + def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), + (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; + ++// Function call with 'Small' code model. + let isCall = 1, Defs = [R1] in + def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func)>; + + def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; + def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; + ++// Function call with 'Medium' code model. ++let isCall = 1, Defs = [R1, R20], Size = 8 in ++def PseudoCALL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$func)>; ++ ++let Predicates = [IsLA64] in { ++def : Pat<(loongarch_call_medium tglobaladdr:$func), ++ (PseudoCALL_MEDIUM tglobaladdr:$func)>; ++def : Pat<(loongarch_call_medium texternalsym:$func), ++ (PseudoCALL_MEDIUM texternalsym:$func)>; ++} // Predicates = [IsLA64] ++ ++// Function call with 'Large' code model. ++let isCall = 1, Defs = [R1, R20], Size = 24 in ++def PseudoCALL_LARGE: Pseudo<(outs), (ins bare_symbol:$func)>; ++ ++let Predicates = [IsLA64] in { ++def : Pat<(loongarch_call_large tglobaladdr:$func), ++ (PseudoCALL_LARGE tglobaladdr:$func)>; ++def : Pat<(loongarch_call_large texternalsym:$func), ++ (PseudoCALL_LARGE texternalsym:$func)>; ++} // Predicates = [IsLA64] ++ + let isCall = 1, Defs = [R1] in + def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), + [(loongarch_call GPR:$rj)]>, + PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; ++let Predicates = [IsLA64] in { ++def : Pat<(loongarch_call_medium GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; ++def : Pat<(loongarch_call_large GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; ++} + + let isCall = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0, Defs = [R1] in + def PseudoJIRL_CALL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, +@@ -1347,6 +1386,7 @@ let isBarrier = 1, isReturn = 1, isTerminator = 1 in + def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, + PseudoInstExpansion<(JIRL R0, R1, 0)>; + ++// Tail call with 'Small' code model. + let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in + def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst)>; + +@@ -1355,10 +1395,38 @@ def : Pat<(loongarch_tail (iPTR tglobaladdr:$dst)), + def : Pat<(loongarch_tail (iPTR texternalsym:$dst)), + (PseudoTAIL texternalsym:$dst)>; + ++// Tail call with 'Medium' code model. ++let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, ++ Uses = [R3], Defs = [R20], Size = 8 in ++def PseudoTAIL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$dst)>; ++ ++let Predicates = [IsLA64] in { ++def : Pat<(loongarch_tail_medium (iPTR tglobaladdr:$dst)), ++ (PseudoTAIL_MEDIUM tglobaladdr:$dst)>; ++def : Pat<(loongarch_tail_medium (iPTR texternalsym:$dst)), ++ (PseudoTAIL_MEDIUM texternalsym:$dst)>; ++} // Predicates = [IsLA64] ++ ++// Tail call with 'Large' code model. ++let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, ++ Uses = [R3], Defs = [R19, R20], Size = 24 in ++def PseudoTAIL_LARGE : Pseudo<(outs), (ins bare_symbol:$dst)>; ++ ++let Predicates = [IsLA64] in { ++def : Pat<(loongarch_tail_large (iPTR tglobaladdr:$dst)), ++ (PseudoTAIL_LARGE tglobaladdr:$dst)>; ++def : Pat<(loongarch_tail_large (iPTR texternalsym:$dst)), ++ (PseudoTAIL_LARGE texternalsym:$dst)>; ++} // Predicates = [IsLA64] ++ + let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in + def PseudoTAILIndirect : Pseudo<(outs), (ins GPRT:$rj), + [(loongarch_tail GPRT:$rj)]>, + PseudoInstExpansion<(JIRL R0, GPR:$rj, 0)>; ++let Predicates = [IsLA64] in { ++def : Pat<(loongarch_tail_medium GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; ++def : Pat<(loongarch_tail_large GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; ++} + + let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + hasSideEffects = 0, mayStore = 0, mayLoad = 0, Uses = [R3] in +@@ -1396,6 +1464,7 @@ def PseudoLA_ABS_LARGE : Pseudo<(outs GPR:$dst), + "la.abs", "$dst, $src">; + def PseudoLA_PCREL : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.pcrel", "$dst, $src">; ++let Defs = [R20], Size = 20 in + def PseudoLA_PCREL_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.pcrel", "$dst, $tmp, $src">, +@@ -1407,28 +1476,30 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, + isAsmParserOnly = 1 in { + def PseudoLA_GOT : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.got", "$dst, $src">; ++def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], ++ "la.tls.ie", "$dst, $src">; ++def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], ++ "la.tls.ld", "$dst, $src">; ++def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], ++ "la.tls.gd", "$dst, $src">; ++let Defs = [R20], Size = 20 in { + def PseudoLA_GOT_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.got", "$dst, $tmp, $src">, + Requires<[IsLA64]>; +-def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], +- "la.tls.ie", "$dst, $src">; + def PseudoLA_TLS_IE_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.tls.ie", "$dst, $tmp, $src">, + Requires<[IsLA64]>; +-def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], +- "la.tls.ld", "$dst, $src">; + def PseudoLA_TLS_LD_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.tls.ld", "$dst, $tmp, $src">, + Requires<[IsLA64]>; +-def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], +- "la.tls.gd", "$dst, $src">; + def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.tls.gd", "$dst, $tmp, $src">, + Requires<[IsLA64]>; ++} // Defs = [R20], Size = 20 + } + + // Load address inst alias: "la", "la.global" and "la.local". +diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll +index 7c6f46d5e926..f93c31670928 100644 +--- a/llvm/test/CodeGen/LoongArch/code-models.ll ++++ b/llvm/test/CodeGen/LoongArch/code-models.ll +@@ -33,11 +33,11 @@ define i32 @call_globaladdress(i32 %a) nounwind { + ; LARGE: # %bb.0: + ; LARGE-NEXT: addi.d $sp, $sp, -16 + ; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee) +-; LARGE-NEXT: addi.d $ra, $zero, %got_pc_lo12(callee) +-; LARGE-NEXT: lu32i.d $ra, %got64_pc_lo20(callee) +-; LARGE-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(callee) +-; LARGE-NEXT: ldx.d $ra, $ra, $a1 ++; LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(callee) ++; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee) ++; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee) ++; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee) ++; LARGE-NEXT: ldx.d $ra, $t8, $ra + ; LARGE-NEXT: jirl $ra, $ra, 0 + ; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload + ; LARGE-NEXT: addi.d $sp, $sp, 16 +@@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) { + ; LARGE-NEXT: .cfi_offset 1, -8 + ; LARGE-NEXT: ori $a2, $zero, 1000 + ; LARGE-NEXT: move $a1, $zero +-; LARGE-NEXT: pcalau12i $a3, %pc_hi20(memset) +-; LARGE-NEXT: addi.d $ra, $zero, %pc_lo12(memset) +-; LARGE-NEXT: lu32i.d $ra, %pc64_lo20(memset) +-; LARGE-NEXT: lu52i.d $ra, $ra, %pc64_hi12(memset) +-; LARGE-NEXT: add.d $ra, $ra, $a3 ++; LARGE-NEXT: pcalau12i $ra, %pc_hi20(memset) ++; LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(memset) ++; LARGE-NEXT: lu32i.d $t8, %pc64_lo20(memset) ++; LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(memset) ++; LARGE-NEXT: add.d $ra, $t8, $ra + ; LARGE-NEXT: jirl $ra, $ra, 0 + ; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload + ; LARGE-NEXT: addi.d $sp, $sp, 16 +@@ -105,17 +105,17 @@ define i32 @caller_tail(i32 %i) nounwind { + ; + ; MEDIUM-LABEL: caller_tail: + ; MEDIUM: # %bb.0: # %entry +-; MEDIUM-NEXT: pcaddu18i $a1, %call36(callee_tail) +-; MEDIUM-NEXT: jr $a1 ++; MEDIUM-NEXT: pcaddu18i $t8, %call36(callee_tail) ++; MEDIUM-NEXT: jr $t8 + ; + ; LARGE-LABEL: caller_tail: + ; LARGE: # %bb.0: # %entry +-; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee_tail) +-; LARGE-NEXT: addi.d $a2, $zero, %got_pc_lo12(callee_tail) +-; LARGE-NEXT: lu32i.d $a2, %got64_pc_lo20(callee_tail) +-; LARGE-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(callee_tail) +-; LARGE-NEXT: ldx.d $a1, $a2, $a1 +-; LARGE-NEXT: jr $a1 ++; LARGE-NEXT: pcalau12i $t7, %got_pc_hi20(callee_tail) ++; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee_tail) ++; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee_tail) ++; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee_tail) ++; LARGE-NEXT: ldx.d $t7, $t8, $t7 ++; LARGE-NEXT: jr $t7 + entry: + %r = tail call i32 @callee_tail(i32 %i) + ret i32 %r +diff --git a/llvm/test/CodeGen/LoongArch/expand-call.ll b/llvm/test/CodeGen/LoongArch/expand-call.ll +index 86bf4292665b..e0d179f92de6 100644 +--- a/llvm/test/CodeGen/LoongArch/expand-call.ll ++++ b/llvm/test/CodeGen/LoongArch/expand-call.ll +@@ -1,6 +1,6 @@ + ; RUN: llc --mtriple=loongarch64 --stop-before loongarch-prera-expand-pseudo \ + ; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=NOEXPAND +-; RUN: llc --mtriple=loongarch64 --stop-after loongarch-prera-expand-pseudo \ ++; RUN: llc --mtriple=loongarch64 --stop-before machine-opt-remark-emitter \ + ; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=EXPAND + + declare void @callee() +diff --git a/llvm/test/CodeGen/LoongArch/global-address.ll b/llvm/test/CodeGen/LoongArch/global-address.ll +index a8f0ef648aa7..d32a17f488b1 100644 +--- a/llvm/test/CodeGen/LoongArch/global-address.ll ++++ b/llvm/test/CodeGen/LoongArch/global-address.ll +@@ -53,32 +53,32 @@ define void @foo() nounwind { + ; LA64LARGENOPIC-LABEL: foo: + ; LA64LARGENOPIC: # %bb.0: + ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +-; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) +-; LA64LARGENOPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G) +-; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) +-; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 ++; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) ++; LA64LARGENOPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G) ++; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) ++; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 + ; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 + ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g) +-; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %pc_lo12(g) +-; LA64LARGENOPIC-NEXT: lu32i.d $a1, %pc64_lo20(g) +-; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) +-; LA64LARGENOPIC-NEXT: add.d $a0, $a1, $a0 ++; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %pc_lo12(g) ++; LA64LARGENOPIC-NEXT: lu32i.d $t8, %pc64_lo20(g) ++; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) ++; LA64LARGENOPIC-NEXT: add.d $a0, $t8, $a0 + ; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 + ; LA64LARGENOPIC-NEXT: ret + ; + ; LA64LARGEPIC-LABEL: foo: + ; LA64LARGEPIC: # %bb.0: + ; LA64LARGEPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +-; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) +-; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G) +-; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) +-; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0 ++; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) ++; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G) ++; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) ++; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0 + ; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 + ; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local) +-; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %pc_lo12(.Lg$local) +-; LA64LARGEPIC-NEXT: lu32i.d $a1, %pc64_lo20(.Lg$local) +-; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(.Lg$local) +-; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 ++; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(.Lg$local) ++; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(.Lg$local) ++; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(.Lg$local) ++; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 + ; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 + ; LA64LARGEPIC-NEXT: ret + %V = load volatile i32, ptr @G +diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll +index a515939b9c2b..474436a0126b 100644 +--- a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll ++++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll +@@ -48,13 +48,13 @@ define void @foo() nounwind { + ; MEDIUM_SCH-NEXT: addi.d $sp, $sp, -16 + ; MEDIUM_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill + ; MEDIUM_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +-; MEDIUM_SCH-NEXT: pcaddu18i $ra, %call36(bar) + ; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) + ; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 + ; MEDIUM_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) + ; MEDIUM_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) + ; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 + ; MEDIUM_SCH-NEXT: ori $a0, $zero, 1 ++; MEDIUM_SCH-NEXT: pcaddu18i $ra, %call36(bar) + ; MEDIUM_SCH-NEXT: jirl $ra, $ra, 0 + ; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) + ; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) +@@ -74,41 +74,41 @@ define void @foo() nounwind { + ; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, -16 + ; LARGE_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill + ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +-; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) +-; LARGE_NO_SCH-NEXT: lu32i.d $a1, %got64_pc_lo20(G) +-; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) +-; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) ++; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G) ++; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 + ; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 + ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +-; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %pc_lo12(g) +-; LARGE_NO_SCH-NEXT: lu32i.d $a1, %pc64_lo20(g) +-; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) +-; LARGE_NO_SCH-NEXT: add.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g) ++; LARGE_NO_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g) ++; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) ++; LARGE_NO_SCH-NEXT: add.d $a0, $t8, $a0 + ; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 + ; LARGE_NO_SCH-NEXT: ori $a0, $zero, 1 +-; LARGE_NO_SCH-NEXT: pcalau12i $a1, %got_pc_hi20(bar) +-; LARGE_NO_SCH-NEXT: addi.d $ra, $zero, %got_pc_lo12(bar) +-; LARGE_NO_SCH-NEXT: lu32i.d $ra, %got64_pc_lo20(bar) +-; LARGE_NO_SCH-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(bar) +-; LARGE_NO_SCH-NEXT: ldx.d $ra, $ra, $a1 ++; LARGE_NO_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar) ++; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar) ++; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(bar) ++; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(bar) ++; LARGE_NO_SCH-NEXT: ldx.d $ra, $t8, $ra + ; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 + ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +-; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(gd) +-; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(gd) +-; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(gd) +-; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(gd) ++; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd) ++; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd) ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 + ; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp + ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +-; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) +-; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) +-; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) +-; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) ++; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) ++; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 + ; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp + ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +-; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) +-; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) +-; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) +-; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) ++; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) ++; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) ++; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 + ; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp + ; LARGE_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload + ; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, 16 +@@ -118,42 +118,42 @@ define void @foo() nounwind { + ; LARGE_SCH: # %bb.0: + ; LARGE_SCH-NEXT: addi.d $sp, $sp, -16 + ; LARGE_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; LARGE_SCH-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) + ; LARGE_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +-; LARGE_SCH-NEXT: addi.d $ra, $zero, %got_pc_lo12(bar) +-; LARGE_SCH-NEXT: lu32i.d $a1, %got64_pc_lo20(G) +-; LARGE_SCH-NEXT: lu32i.d $ra, %got64_pc_lo20(bar) +-; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) +-; LARGE_SCH-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(bar) +-; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 +-; LARGE_SCH-NEXT: addi.d $a1, $zero, %pc_lo12(g) +-; LARGE_SCH-NEXT: lu32i.d $a1, %pc64_lo20(g) +-; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) ++; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) ++; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G) ++; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) ++; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 + ; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 + ; LARGE_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +-; LARGE_SCH-NEXT: add.d $a0, $a1, $a0 +-; LARGE_SCH-NEXT: pcalau12i $a1, %got_pc_hi20(bar) ++; LARGE_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g) ++; LARGE_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g) ++; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) ++; LARGE_SCH-NEXT: add.d $a0, $t8, $a0 + ; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 +-; LARGE_SCH-NEXT: ldx.d $ra, $ra, $a1 + ; LARGE_SCH-NEXT: ori $a0, $zero, 1 ++; LARGE_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar) ++; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar) ++; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(bar) ++; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(bar) ++; LARGE_SCH-NEXT: ldx.d $ra, $t8, $ra + ; LARGE_SCH-NEXT: jirl $ra, $ra, 0 +-; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(gd) + ; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +-; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(gd) +-; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(gd) +-; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 +-; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) +-; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) +-; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) ++; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(gd) ++; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd) ++; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd) ++; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 + ; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp + ; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +-; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 +-; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) +-; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) +-; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) ++; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) ++; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) ++; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) ++; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 + ; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp + ; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +-; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 ++; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) ++; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) ++; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) ++; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 + ; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp + ; LARGE_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload + ; LARGE_SCH-NEXT: addi.d $sp, $sp, 16 +diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll +index a2a3792a6a54..3994df1da716 100644 +--- a/llvm/test/CodeGen/LoongArch/tls-models.ll ++++ b/llvm/test/CodeGen/LoongArch/tls-models.ll +@@ -45,15 +45,15 @@ define ptr @f1() nounwind { + ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 + ; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill + ; LA64LARGEPIC-NEXT: pcalau12i $a0, %gd_pc_hi20(unspecified) +-; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(unspecified) +-; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(unspecified) +-; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(unspecified) +-; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 +-; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) +-; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) +-; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) +-; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) +-; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 ++; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(unspecified) ++; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(unspecified) ++; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(unspecified) ++; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 ++; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) ++; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) ++; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) ++; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) ++; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra + ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 + ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload + ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 +@@ -76,10 +76,10 @@ define ptr @f1() nounwind { + ; LA64LARGENOPIC-LABEL: f1: + ; LA64LARGENOPIC: # %bb.0: # %entry + ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) +-; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(unspecified) +-; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(unspecified) +-; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(unspecified) +-; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 ++; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(unspecified) ++; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(unspecified) ++; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(unspecified) ++; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 + ; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp + ; LA64LARGENOPIC-NEXT: ret + entry: +@@ -116,15 +116,15 @@ define ptr @f2() nounwind { + ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 + ; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill + ; LA64LARGEPIC-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) +-; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(ld) +-; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(ld) +-; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(ld) +-; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 +-; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) +-; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) +-; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) +-; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) +-; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 ++; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(ld) ++; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) ++; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) ++; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 ++; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) ++; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) ++; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) ++; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) ++; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra + ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 + ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload + ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 +@@ -147,10 +147,10 @@ define ptr @f2() nounwind { + ; LA64LARGENOPIC-LABEL: f2: + ; LA64LARGENOPIC: # %bb.0: # %entry + ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +-; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) +-; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) +-; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) +-; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 ++; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) ++; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) ++; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) ++; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 + ; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp + ; LA64LARGENOPIC-NEXT: ret + entry: +@@ -177,10 +177,10 @@ define ptr @f3() nounwind { + ; LA64LARGEPIC-LABEL: f3: + ; LA64LARGEPIC: # %bb.0: # %entry + ; LA64LARGEPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +-; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) +-; LA64LARGEPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) +-; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) +-; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0 ++; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) ++; LA64LARGEPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) ++; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) ++; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0 + ; LA64LARGEPIC-NEXT: add.d $a0, $a0, $tp + ; LA64LARGEPIC-NEXT: ret + ; +@@ -201,10 +201,10 @@ define ptr @f3() nounwind { + ; LA64LARGENOPIC-LABEL: f3: + ; LA64LARGENOPIC: # %bb.0: # %entry + ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +-; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) +-; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) +-; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) +-; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 ++; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) ++; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) ++; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) ++; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 + ; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp + ; LA64LARGENOPIC-NEXT: ret + entry: +-- +2.20.1 + diff --git a/0010-LoongArch-Fix-Wunused-variable-in-LoongArchExpandPse.patch b/0010-LoongArch-Fix-Wunused-variable-in-LoongArchExpandPse.patch new file mode 100644 index 0000000..fd25015 --- /dev/null +++ b/0010-LoongArch-Fix-Wunused-variable-in-LoongArchExpandPse.patch @@ -0,0 +1,38 @@ +From d50769aa6c042aa73e27d4ccf060f674c904eaf1 Mon Sep 17 00:00:00 2001 +From: Jie Fu +Date: Fri, 5 Jan 2024 12:05:23 +0800 +Subject: [PATCH 10/23] [LoongArch] Fix -Wunused-variable in + LoongArchExpandPseudoInsts.cpp (NFC) + +llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp:480:20: + error: unused variable 'MF' [-Werror,-Wunused-variable] + MachineFunction *MF = MBB.getParent(); + ^ +1 error generated. + +(cherry picked from commit 52d1397e38ee88b170585c9c824d08e6975890ca) +--- + llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +index f977f176066a..ad39658f698e 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +@@ -477,12 +477,11 @@ bool LoongArchExpandPseudo::expandLargeAddressLoad( + break; + } + +- MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + Register ScratchReg = LoongArch::R20; // $t8 + +- assert(MF->getSubtarget().is64Bit() && ++ assert(MBB.getParent()->getSubtarget().is64Bit() && + "Large code model requires LA64"); + + auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), DestReg); +-- +2.20.1 + diff --git a/0010-LoongArch-Reorder-LoongArchTargetLowering-.-NFC.patch b/0010-LoongArch-Reorder-LoongArchTargetLowering-.-NFC.patch new file mode 100644 index 0000000..5a5fdf9 --- /dev/null +++ b/0010-LoongArch-Reorder-LoongArchTargetLowering-.-NFC.patch @@ -0,0 +1,280 @@ +From 9869322d067e8cec3d9a6417b4978fde7f4e71e4 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 23 Aug 2023 15:28:00 +0800 +Subject: [PATCH 10/42] [LoongArch] Reorder LoongArchTargetLowering(). NFC + +(cherry picked from commit 3693909ca47f1fafc97b441c91f5656acdd3907c) + + +[LoongArch] Fix Subtarget.is64Bit + +(cherry picked from commit 749f36dae311000e1d69351707f4f24a72090c94) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 152 ++++++++++-------- + 1 file changed, 82 insertions(+), 70 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 3a40cd06a3eb..2f8ce57d3f5f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -47,20 +47,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + : TargetLowering(TM), Subtarget(STI) { + + MVT GRLenVT = Subtarget.getGRLenVT(); ++ + // Set up the register classes. ++ + addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); + if (Subtarget.hasBasicF()) + addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); + if (Subtarget.hasBasicD()) + addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); +- if (Subtarget.hasExtLSX()) +- for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32, +- MVT::v2i64}) +- addRegisterClass(VT, &LoongArch::LSX128RegClass); +- if (Subtarget.hasExtLASX()) +- for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32, +- MVT::v4i64}) +- addRegisterClass(VT, &LoongArch::LASX256RegClass); + + static const MVT::SimpleValueType LSXVTs[] = { + MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; +@@ -75,38 +69,57 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + for (MVT VT : LASXVTs) + addRegisterClass(VT, &LoongArch::LASX256RegClass); + ++ // Set operations for LA32 and LA64. ++ + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, + MVT::i1, Promote); + +- // TODO: add necessary setOperationAction calls later. + setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); + setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); + setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); + setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); + setOperationAction(ISD::ROTL, GRLenVT, Expand); + setOperationAction(ISD::CTPOP, GRLenVT, Expand); +- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); +- setOperationAction(ISD::TRAP, MVT::Other, Legal); +- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + + setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, +- ISD::JumpTable}, ++ ISD::JumpTable, ISD::GlobalTLSAddress}, + GRLenVT, Custom); + +- setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); +- +- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); +- +- setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); +- if (Subtarget.is64Bit()) +- setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); ++ setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); + + setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); + setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); + ++ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); ++ setOperationAction(ISD::TRAP, MVT::Other, Legal); ++ ++ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); ++ ++ // Expand bitreverse.i16 with native-width bitrev and shift for now, before ++ // we get to know which of sll and revb.2h is faster. ++ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); ++ setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); ++ ++ // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and ++ // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 ++ // and i32 could still be byte-swapped relatively cheaply. ++ setOperationAction(ISD::BSWAP, MVT::i16, Custom); ++ ++ setOperationAction(ISD::BR_JT, MVT::Other, Expand); ++ setOperationAction(ISD::BR_CC, GRLenVT, Expand); ++ setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); ++ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); ++ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); ++ ++ setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); ++ setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); ++ ++ // Set operations for LA64 only. ++ + if (Subtarget.is64Bit()) { + setOperationAction(ISD::SHL, MVT::i32, Custom); + setOperationAction(ISD::SRA, MVT::i32, Custom); +@@ -117,50 +130,39 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::CTTZ, MVT::i32, Custom); + setOperationAction(ISD::CTLZ, MVT::i32, Custom); +- setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); ++ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); + setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); ++ setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); +- if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) +- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); +- if (Subtarget.hasBasicF()) +- setOperationAction(ISD::FRINT, MVT::f32, Legal); +- if (Subtarget.hasBasicD()) +- setOperationAction(ISD::FRINT, MVT::f64, Legal); +- } ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); + +- // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and +- // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 +- // and i32 could still be byte-swapped relatively cheaply. +- setOperationAction(ISD::BSWAP, MVT::i16, Custom); +- if (Subtarget.is64Bit()) { ++ setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); + setOperationAction(ISD::BSWAP, MVT::i32, Custom); + } + +- // Expand bitreverse.i16 with native-width bitrev and shift for now, before +- // we get to know which of sll and revb.2h is faster. +- setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); +- if (Subtarget.is64Bit()) { +- setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); +- setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); +- } else { +- setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); ++ // Set operations for LA32 only. ++ ++ if (!Subtarget.is64Bit()) { + setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); ++ ++ // Set libcalls. ++ setLibcallName(RTLIB::MUL_I128, nullptr); + } + + static const ISD::CondCode FPCCToExpand[] = { + ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, + ISD::SETGE, ISD::SETNE, ISD::SETGT}; + ++ // Set operations for 'F' feature. ++ + if (Subtarget.hasBasicF()) { + setCondCodeAction(FPCCToExpand, MVT::f32, Expand); ++ + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f32, Legal); +@@ -173,14 +175,30 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); ++ ++ if (Subtarget.is64Bit()) ++ setOperationAction(ISD::FRINT, MVT::f32, Legal); ++ ++ if (!Subtarget.hasBasicD()) { ++ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); ++ if (Subtarget.is64Bit()) { ++ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); ++ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); ++ } ++ } + } ++ ++ // Set operations for 'D' feature. ++ + if (Subtarget.hasBasicD()) { ++ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); ++ setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setCondCodeAction(FPCCToExpand, MVT::f64, Expand); ++ + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); +- setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); +@@ -189,35 +207,35 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); +- setTruncStoreAction(MVT::f64, MVT::f32, Expand); +- } +- +- setOperationAction(ISD::BR_JT, MVT::Other, Expand); + +- setOperationAction(ISD::BR_CC, GRLenVT, Expand); +- setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); +- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); +- setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); +- if (!Subtarget.is64Bit()) +- setLibcallName(RTLIB::MUL_I128, nullptr); +- +- setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); +- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); +- if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && +- !Subtarget.hasBasicD())) { +- setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); +- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); ++ if (Subtarget.is64Bit()) ++ setOperationAction(ISD::FRINT, MVT::f64, Legal); + } + ++ // Set operations for 'LSX' feature. ++ + if (Subtarget.hasExtLSX()) + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, + {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); + ++ // Set operations for 'LASX' feature. ++ + if (Subtarget.hasExtLASX()) + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, + {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, + Legal); + ++ // Set DAG combine for LA32 and LA64. ++ ++ setTargetDAGCombine(ISD::AND); ++ setTargetDAGCombine(ISD::OR); ++ setTargetDAGCombine(ISD::SRL); ++ ++ // Set DAG combine for 'LSX' feature. ++ ++ if (Subtarget.hasExtLSX()) ++ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); ++ + // Compute derived properties from the register classes. + computeRegisterProperties(Subtarget.getRegisterInfo()); + +@@ -235,12 +253,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); + setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); + setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); +- +- setTargetDAGCombine(ISD::AND); +- setTargetDAGCombine(ISD::OR); +- setTargetDAGCombine(ISD::SRL); +- if (Subtarget.hasExtLSX()) +- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + } + + bool LoongArchTargetLowering::isOffsetFoldingLegal( +-- +2.20.1 + diff --git a/0010-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch b/0010-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch new file mode 100644 index 0000000..a0d1d21 --- /dev/null +++ b/0010-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch @@ -0,0 +1,86 @@ +From 65807250035e1f9631e863ac6cc9af74c39d4c4f Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Thu, 26 Oct 2023 11:50:28 +0800 +Subject: [PATCH 10/27] [LoongArch][test] Add some ABI regression tests for + empty struct. NFC + +How empty structs (not as fields of container struct) are passed in C++ +is not explicitly documented in psABI. This patch adds some tests +showing the current handing of clang. Some of the results are different +from gcc. Following patch(es) will try to fix the mismatch. + +(cherry picked from commit 8149066fa532d82ff62a0629d5a9fab6bd4da768) +--- + .../LoongArch/abi-lp64d-empty-structs.c | 53 +++++++++++++++++++ + 1 file changed, 53 insertions(+) + +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index fb90bf556c19..d0daafac336e 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -81,9 +81,62 @@ struct s8 test_s8(struct s8 a) { + return a; + } + ++/// Note: Below tests check how empty structs are passed while above tests check ++/// empty structs as fields of container struct are ignored when flattening ++/// structs to examine whether the container structs can be passed via FARs. ++ + // CHECK-C: define{{.*}} void @test_s9() + // CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) + struct s9 { struct empty e; }; + struct s9 test_s9(struct s9 a) { + return a; + } ++ ++// CHECK-C: define{{.*}} void @test_s10() ++// CHECK-CXX: define{{.*}} void @_Z8test_s103s10() ++struct s10 { }; ++struct s10 test_s10(struct s10 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s11() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s113s11(i64 {{.*}}) ++struct s11 { struct { } s; }; ++struct s11 test_s11(struct s11 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s12() ++// CHECK-CXX: define{{.*}} void @_Z8test_s123s12() ++struct s12 { int i[0]; }; ++struct s12 test_s12(struct s12 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s13() ++// CHECK-CXX: define{{.*}} void @_Z8test_s133s13() ++struct s13 { struct { } s[0]; }; ++struct s13 test_s13(struct s13 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s14() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s143s14(i64 {{.*}}) ++struct s14 { struct { } s[1]; }; ++struct s14 test_s14(struct s14 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s15() ++// CHECK-CXX: define{{.*}} void @_Z8test_s153s15() ++struct s15 { int : 0; }; ++struct s15 test_s15(struct s15 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s16() ++// CHECK-CXX: define{{.*}} void @_Z8test_s163s16() ++struct s16 { int : 1; }; ++struct s16 test_s16(struct s16 a) { ++ return a; ++} +-- +2.20.1 + diff --git a/0010-lld-ELF-Support-relax-R_LARCH_ALIGN-78692.patch b/0010-lld-ELF-Support-relax-R_LARCH_ALIGN-78692.patch new file mode 100644 index 0000000..1ced65b --- /dev/null +++ b/0010-lld-ELF-Support-relax-R_LARCH_ALIGN-78692.patch @@ -0,0 +1,562 @@ +From 80c56e85d742bb88533e4789c76ae2b55dc36835 Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Tue, 6 Feb 2024 09:09:13 +0800 +Subject: [PATCH 10/14] [lld][ELF] Support relax R_LARCH_ALIGN (#78692) + +Refer to commit 6611d58f5bbc ("Relax R_RISCV_ALIGN"), we can relax +R_LARCH_ALIGN by same way. Reuse `SymbolAnchor`, `RISCVRelaxAux` and +`initSymbolAnchors` to simplify codes. As `riscvFinalizeRelax` is an +arch-specific function, put it override on `TargetInfo::finalizeRelax`, +so that LoongArch can override it, too. + +The flow of relax R_LARCH_ALIGN is almost consistent with RISCV. The +difference is that LoongArch only has 4-bytes NOP and all executable +insn is 4-bytes aligned. So LoongArch not need rewrite NOP sequence. +Alignment maxBytesEmit parameter is supported in psABI v2.30. + +(cherry picked from commit 06a728f3feab876f9195738b5774e82dadc0f3a7) +(cherry picked from commit 60a8ec3a35c722a9eb8298c215321b89d0faf5b5) +--- + lld/ELF/Arch/LoongArch.cpp | 156 ++++++++++++++++++++- + lld/ELF/Arch/RISCV.cpp | 28 +--- + lld/ELF/InputSection.cpp | 5 +- + lld/ELF/InputSection.h | 24 +++- + lld/ELF/Target.h | 3 + + lld/ELF/Writer.cpp | 4 +- + lld/test/ELF/loongarch-relax-align.s | 126 +++++++++++++++++ + lld/test/ELF/loongarch-relax-emit-relocs.s | 49 +++++++ + 8 files changed, 362 insertions(+), 33 deletions(-) + create mode 100644 lld/test/ELF/loongarch-relax-align.s + create mode 100644 lld/test/ELF/loongarch-relax-emit-relocs.s + +diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp +index d3a538577a59..3f57a76873f9 100644 +--- a/lld/ELF/Arch/LoongArch.cpp ++++ b/lld/ELF/Arch/LoongArch.cpp +@@ -36,6 +36,8 @@ public: + bool usesOnlyLowPageBits(RelType type) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; ++ bool relaxOnce(int pass) const override; ++ void finalizeRelax(int passes) const override; + }; + } // end anonymous namespace + +@@ -521,8 +523,9 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, + case R_LARCH_TLS_GD_HI20: + return R_TLSGD_GOT; + case R_LARCH_RELAX: +- // LoongArch linker relaxation is not implemented yet. +- return R_NONE; ++ return config->relax ? R_RELAX_HINT : R_NONE; ++ case R_LARCH_ALIGN: ++ return R_RELAX_HINT; + + // Other known relocs that are explicitly unimplemented: + // +@@ -696,6 +699,155 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, + } + } + ++static bool relax(InputSection &sec) { ++ const uint64_t secAddr = sec.getVA(); ++ const MutableArrayRef relocs = sec.relocs(); ++ auto &aux = *sec.relaxAux; ++ bool changed = false; ++ ArrayRef sa = ArrayRef(aux.anchors); ++ uint64_t delta = 0; ++ ++ std::fill_n(aux.relocTypes.get(), relocs.size(), R_LARCH_NONE); ++ aux.writes.clear(); ++ for (auto [i, r] : llvm::enumerate(relocs)) { ++ const uint64_t loc = secAddr + r.offset - delta; ++ uint32_t &cur = aux.relocDeltas[i], remove = 0; ++ switch (r.type) { ++ case R_LARCH_ALIGN: { ++ const uint64_t addend = ++ r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend; ++ const uint64_t allBytes = (1 << (addend & 0xff)) - 4; ++ const uint64_t align = 1 << (addend & 0xff); ++ const uint64_t maxBytes = addend >> 8; ++ const uint64_t off = loc & (align - 1); ++ const uint64_t curBytes = off == 0 ? 0 : align - off; ++ // All bytes beyond the alignment boundary should be removed. ++ // If emit bytes more than max bytes to emit, remove all. ++ if (maxBytes != 0 && curBytes > maxBytes) ++ remove = allBytes; ++ else ++ remove = allBytes - curBytes; ++ // If we can't satisfy this alignment, we've found a bad input. ++ if (LLVM_UNLIKELY(static_cast(remove) < 0)) { ++ errorOrWarn(getErrorLocation((const uint8_t *)loc) + ++ "insufficient padding bytes for " + lld::toString(r.type) + ++ ": " + Twine(allBytes) + " bytes available for " + ++ "requested alignment of " + Twine(align) + " bytes"); ++ remove = 0; ++ } ++ break; ++ } ++ } ++ ++ // For all anchors whose offsets are <= r.offset, they are preceded by ++ // the previous relocation whose `relocDeltas` value equals `delta`. ++ // Decrease their st_value and update their st_size. ++ for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) { ++ if (sa[0].end) ++ sa[0].d->size = sa[0].offset - delta - sa[0].d->value; ++ else ++ sa[0].d->value = sa[0].offset - delta; ++ } ++ delta += remove; ++ if (delta != cur) { ++ cur = delta; ++ changed = true; ++ } ++ } ++ ++ for (const SymbolAnchor &a : sa) { ++ if (a.end) ++ a.d->size = a.offset - delta - a.d->value; ++ else ++ a.d->value = a.offset - delta; ++ } ++ // Inform assignAddresses that the size has changed. ++ if (!isUInt<32>(delta)) ++ fatal("section size decrease is too large: " + Twine(delta)); ++ sec.bytesDropped = delta; ++ return changed; ++} ++ ++// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in ++// the absence of a linker script. For call and load/store R_LARCH_RELAX, code ++// shrinkage may reduce displacement and make more relocations eligible for ++// relaxation. Code shrinkage may increase displacement to a call/load/store ++// target at a higher fixed address, invalidating an earlier relaxation. Any ++// change in section sizes can have cascading effect and require another ++// relaxation pass. ++bool LoongArch::relaxOnce(int pass) const { ++ if (config->relocatable) ++ return false; ++ ++ if (pass == 0) ++ initSymbolAnchors(); ++ ++ SmallVector storage; ++ bool changed = false; ++ for (OutputSection *osec : outputSections) { ++ if (!(osec->flags & SHF_EXECINSTR)) ++ continue; ++ for (InputSection *sec : getInputSections(*osec, storage)) ++ changed |= relax(*sec); ++ } ++ return changed; ++} ++ ++void LoongArch::finalizeRelax(int passes) const { ++ log("relaxation passes: " + Twine(passes)); ++ SmallVector storage; ++ for (OutputSection *osec : outputSections) { ++ if (!(osec->flags & SHF_EXECINSTR)) ++ continue; ++ for (InputSection *sec : getInputSections(*osec, storage)) { ++ RelaxAux &aux = *sec->relaxAux; ++ if (!aux.relocDeltas) ++ continue; ++ ++ MutableArrayRef rels = sec->relocs(); ++ ArrayRef old = sec->content(); ++ size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1]; ++ uint8_t *p = context().bAlloc.Allocate(newSize); ++ uint64_t offset = 0; ++ int64_t delta = 0; ++ sec->content_ = p; ++ sec->size = newSize; ++ sec->bytesDropped = 0; ++ ++ // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite ++ // instructions for relaxed relocations. ++ for (size_t i = 0, e = rels.size(); i != e; ++i) { ++ uint32_t remove = aux.relocDeltas[i] - delta; ++ delta = aux.relocDeltas[i]; ++ if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE) ++ continue; ++ ++ // Copy from last location to the current relocated location. ++ const Relocation &r = rels[i]; ++ uint64_t size = r.offset - offset; ++ memcpy(p, old.data() + offset, size); ++ p += size; ++ offset = r.offset + remove; ++ } ++ memcpy(p, old.data() + offset, old.size() - offset); ++ ++ // Subtract the previous relocDeltas value from the relocation offset. ++ // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease ++ // their r_offset by the same delta. ++ delta = 0; ++ for (size_t i = 0, e = rels.size(); i != e;) { ++ uint64_t cur = rels[i].offset; ++ do { ++ rels[i].offset -= delta; ++ if (aux.relocTypes[i] != R_LARCH_NONE) ++ rels[i].type = aux.relocTypes[i]; ++ } while (++i != e && rels[i].offset == cur); ++ delta = aux.relocDeltas[i - 1]; ++ } ++ } ++ } ++} ++ + TargetInfo *elf::getLoongArchTargetInfo() { + static LoongArch target; + return ⌖ +diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp +index d0d75118e30d..06120cabc132 100644 +--- a/lld/ELF/Arch/RISCV.cpp ++++ b/lld/ELF/Arch/RISCV.cpp +@@ -44,6 +44,7 @@ public: + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + bool relaxOnce(int pass) const override; ++ void finalizeRelax(int passes) const override; + }; + + } // end anonymous namespace +@@ -513,33 +514,14 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + } + } + +-namespace { +-struct SymbolAnchor { +- uint64_t offset; +- Defined *d; +- bool end; // true for the anchor of st_value+st_size +-}; +-} // namespace +- +-struct elf::RISCVRelaxAux { +- // This records symbol start and end offsets which will be adjusted according +- // to the nearest relocDeltas element. +- SmallVector anchors; +- // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] : +- // 0). +- std::unique_ptr relocDeltas; +- // For relocations[i], the actual type is relocTypes[i]. +- std::unique_ptr relocTypes; +- SmallVector writes; +-}; + +-static void initSymbolAnchors() { ++void elf::initSymbolAnchors() { + SmallVector storage; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { +- sec->relaxAux = make(); ++ sec->relaxAux = make(); + if (sec->relocs().size()) { + sec->relaxAux->relocDeltas = + std::make_unique(sec->relocs().size()); +@@ -766,7 +748,7 @@ bool RISCV::relaxOnce(int pass) const { + return changed; + } + +-void elf::riscvFinalizeRelax(int passes) { ++void RISCV::finalizeRelax(int passes) const { + llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation"); + log("relaxation passes: " + Twine(passes)); + SmallVector storage; +@@ -774,7 +756,7 @@ void elf::riscvFinalizeRelax(int passes) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { +- RISCVRelaxAux &aux = *sec->relaxAux; ++ RelaxAux &aux = *sec->relaxAux; + if (!aux.relocDeltas) + continue; + +diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp +index 1aff6b968d86..b178d82407e3 100644 +--- a/lld/ELF/InputSection.cpp ++++ b/lld/ELF/InputSection.cpp +@@ -351,8 +351,9 @@ InputSectionBase *InputSection::getRelocatedSection() const { + + template + void InputSection::copyRelocations(uint8_t *buf) { +- if (config->relax && !config->relocatable && config->emachine == EM_RISCV) { +- // On RISC-V, relaxation might change relocations: copy from ++ if (config->relax && !config->relocatable && ++ (config->emachine == EM_RISCV || config->emachine == EM_LOONGARCH)) { ++ // On LoongArch and RISC-V, relaxation might change relocations: copy from + // internal ones that are updated by relaxation. + InputSectionBase *sec = getRelocatedSection(); + copyRelocations(buf, llvm::make_range(sec->relocations.begin(), +diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h +index 2b91711abba3..842bc369909d 100644 +--- a/lld/ELF/InputSection.h ++++ b/lld/ELF/InputSection.h +@@ -101,7 +101,23 @@ protected: + link(link), info(info) {} + }; + +-struct RISCVRelaxAux; ++struct SymbolAnchor { ++ uint64_t offset; ++ Defined *d; ++ bool end; // true for the anchor of st_value+st_size ++}; ++ ++struct RelaxAux { ++ // This records symbol start and end offsets which will be adjusted according ++ // to the nearest relocDeltas element. ++ SmallVector anchors; ++ // For relocations[i], the actual offset is ++ // r_offset - (i ? relocDeltas[i-1] : 0). ++ std::unique_ptr relocDeltas; ++ // For relocations[i], the actual type is relocTypes[i]. ++ std::unique_ptr relocTypes; ++ SmallVector writes; ++}; + + // This corresponds to a section of an input file. + class InputSectionBase : public SectionBase { +@@ -222,9 +238,9 @@ public: + // basic blocks. + JumpInstrMod *jumpInstrMod = nullptr; + +- // Auxiliary information for RISC-V linker relaxation. RISC-V does not use +- // jumpInstrMod. +- RISCVRelaxAux *relaxAux; ++ // Auxiliary information for RISC-V and LoongArch linker relaxation. ++ // They do not use jumpInstrMod. ++ RelaxAux *relaxAux; + + // The compressed content size when `compressed` is true. + size_t compressedSize; +diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h +index 47dbe6b4d1c6..bf831afa1793 100644 +--- a/lld/ELF/Target.h ++++ b/lld/ELF/Target.h +@@ -94,6 +94,8 @@ public: + + // Do a linker relaxation pass and return true if we changed something. + virtual bool relaxOnce(int pass) const { return false; } ++ // Do finalize relaxation after collecting relaxation infos. ++ virtual void finalizeRelax(int passes) const {} + + virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, + JumpModType val) const {} +@@ -234,6 +236,7 @@ void addArmInputSectionMappingSymbols(); + void addArmSyntheticSectionMappingSymbol(Defined *); + void sortArmMappingSymbols(); + void convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf); ++void initSymbolAnchors(); + + LLVM_LIBRARY_VISIBILITY extern const TargetInfo *target; + TargetInfo *getTarget(); +diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp +index 368c9aabceae..dd37bbbf76c1 100644 +--- a/lld/ELF/Writer.cpp ++++ b/lld/ELF/Writer.cpp +@@ -1668,8 +1668,8 @@ template void Writer::finalizeAddressDependentContent() { + } + } + } +- if (!config->relocatable && config->emachine == EM_RISCV) +- riscvFinalizeRelax(pass); ++ if (!config->relocatable) ++ target->finalizeRelax(pass); + + if (config->relocatable) + for (OutputSection *sec : outputSections) +diff --git a/lld/test/ELF/loongarch-relax-align.s b/lld/test/ELF/loongarch-relax-align.s +new file mode 100644 +index 000000000000..ab61e15d5cac +--- /dev/null ++++ b/lld/test/ELF/loongarch-relax-align.s +@@ -0,0 +1,126 @@ ++# REQUIRES: loongarch ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o ++# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o -o %t.32 ++# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o -o %t.64 ++# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o --no-relax -o %t.32n ++# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o --no-relax -o %t.64n ++# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck %s ++# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck %s ++# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck %s ++# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck %s ++ ++## Test the R_LARCH_ALIGN without symbol index. ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.o64.o --defsym=old=1 ++# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o -o %t.o64 ++# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o --no-relax -o %t.o64n ++# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck %s ++# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck %s ++ ++## -r keeps section contents unchanged. ++# RUN: ld.lld -r %t.64.o -o %t.64.r ++# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s --check-prefix=CHECKR ++ ++# CHECK-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start ++# CHECK-DAG: {{0*}}10038 l .text {{0*}}0c .L1 ++# CHECK-DAG: {{0*}}10040 l .text {{0*}}04 .L2 ++# CHECK-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start ++ ++# CHECK: <.Ltext_start>: ++# CHECK-NEXT: break 1 ++# CHECK-NEXT: break 2 ++# CHECK-NEXT: nop ++# CHECK-NEXT: nop ++# CHECK-NEXT: break 3 ++# CHECK-NEXT: break 4 ++# CHECK-NEXT: nop ++# CHECK-NEXT: nop ++# CHECK-NEXT: pcalau12i $a0, 0 ++# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0 ++# CHECK-NEXT: pcalau12i $a0, 0 ++# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 56 ++# CHECK-NEXT: pcalau12i $a0, 0 ++# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 64 ++# CHECK-EMPTY: ++# CHECK-NEXT: <.L1>: ++# CHECK-NEXT: nop ++# CHECK-NEXT: nop ++# CHECK-EMPTY: ++# CHECK-NEXT: <.L2>: ++# CHECK-NEXT: break 5 ++ ++# CHECK: <.Ltext2_start>: ++# CHECK-NEXT: pcalau12i $a0, 0 ++# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0 ++# CHECK-NEXT: nop ++# CHECK-NEXT: nop ++# CHECK-NEXT: break 6 ++ ++# CHECKR: <.Ltext2_start>: ++# CHECKR-NEXT: pcalau12i $a0, 0 ++# CHECKR-NEXT: {{0*}}00: R_LARCH_PCALA_HI20 .Ltext2_start ++# CHECKR-NEXT: {{0*}}00: R_LARCH_RELAX *ABS* ++# CHECKR-NEXT: addi.d $a0, $a0, 0 ++# CHECKR-NEXT: {{0*}}04: R_LARCH_PCALA_LO12 .Ltext2_start ++# CHECKR-NEXT: {{0*}}04: R_LARCH_RELAX *ABS* ++# CHECKR-NEXT: nop ++# CHECKR-NEXT: {{0*}}08: R_LARCH_ALIGN .Lalign_symbol+0x4 ++# CHECKR-NEXT: nop ++# CHECKR-NEXT: nop ++# CHECKR-NEXT: break 6 ++ ++.macro .fake_p2align_4 max=0 ++ .ifdef old ++ .if \max==0 ++ .reloc ., R_LARCH_ALIGN, 0xc ++ nop; nop; nop ++ .endif ++ .else ++ .reloc ., R_LARCH_ALIGN, .Lalign_symbol + 0x4 + (\max << 8) ++ nop; nop; nop ++ .endif ++.endm ++ ++ .text ++.Lalign_symbol: ++.Ltext_start: ++ break 1 ++ break 2 ++## +0x8: Emit 2 nops, delete 1 nop. ++ .fake_p2align_4 ++ ++ break 3 ++## +0x14: Emit 3 nops > 8 bytes, not emit. ++ .fake_p2align_4 8 ++ ++ break 4 ++ .fake_p2align_4 8 ++## +0x18: Emit 2 nops <= 8 bytes. ++ ++## Compensate ++.ifdef old ++ nop; nop ++.endif ++ ++## +0x20: Test symbol value and symbol size can be handled. ++ la.pcrel $a0, .Ltext_start ++ la.pcrel $a0, .L1 ++ la.pcrel $a0, .L2 ++ ++## +0x38: Emit 2 nops, delete 1 nop. ++.L1: ++ .fake_p2align_4 ++.L2: ++ break 5 ++ .size .L1, . - .L1 ++ .size .L2, . - .L2 ++ .size .Ltext_start, . - .Ltext_start ++ ++## Test another text section. ++ .section .text2,"ax",@progbits ++.Ltext2_start: ++ la.pcrel $a0, .Ltext2_start ++ .fake_p2align_4 ++ break 6 ++ .size .Ltext2_start, . - .Ltext2_start +diff --git a/lld/test/ELF/loongarch-relax-emit-relocs.s b/lld/test/ELF/loongarch-relax-emit-relocs.s +new file mode 100644 +index 000000000000..581fce8c95ca +--- /dev/null ++++ b/lld/test/ELF/loongarch-relax-emit-relocs.s +@@ -0,0 +1,49 @@ ++# REQUIRES: loongarch ++## Test that we can handle --emit-relocs while relaxing. ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o ++# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.32.o -o %t.32 ++# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.64.o -o %t.64 ++# RUN: llvm-objdump -dr %t.32 | FileCheck %s ++# RUN: llvm-objdump -dr %t.64 | FileCheck %s ++ ++## -r should keep original relocations. ++# RUN: ld.lld -r %t.64.o -o %t.64.r ++# RUN: llvm-objdump -dr %t.64.r | FileCheck %s --check-prefix=CHECKR ++ ++## --no-relax should keep original relocations. ++## TODO Due to R_LARCH_RELAX is not relaxed, it plays same as --relax now. ++# RUN: ld.lld -Ttext=0x10000 --emit-relocs --no-relax %t.64.o -o %t.64.norelax ++# RUN: llvm-objdump -dr %t.64.norelax | FileCheck %s ++ ++# CHECK: 00010000 <_start>: ++# CHECK-NEXT: pcalau12i $a0, 0 ++# CHECK-NEXT: R_LARCH_PCALA_HI20 _start ++# CHECK-NEXT: R_LARCH_RELAX *ABS* ++# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0 ++# CHECK-NEXT: R_LARCH_PCALA_LO12 _start ++# CHECK-NEXT: R_LARCH_RELAX *ABS* ++# CHECK-NEXT: nop ++# CHECK-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4 ++# CHECK-NEXT: nop ++# CHECK-NEXT: ret ++ ++# CHECKR: <_start>: ++# CHECKR-NEXT: pcalau12i $a0, 0 ++# CHECKR-NEXT: R_LARCH_PCALA_HI20 _start ++# CHECKR-NEXT: R_LARCH_RELAX *ABS* ++# CHECKR-NEXT: addi.d $a0, $a0, 0 ++# CHECKR-NEXT: R_LARCH_PCALA_LO12 _start ++# CHECKR-NEXT: R_LARCH_RELAX *ABS* ++# CHECKR-NEXT: nop ++# CHECKR-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4 ++# CHECKR-NEXT: nop ++# CHECKR-NEXT: nop ++# CHECKR-NEXT: ret ++ ++.global _start ++_start: ++ la.pcrel $a0, _start ++ .p2align 4 ++ ret +-- +2.20.1 + diff --git a/0011-Backport-LoongArch-Add-the-support-for-vector-in-llvm17.patch b/0011-Backport-LoongArch-Add-the-support-for-vector-in-llvm17.patch deleted file mode 100644 index 0976d4e..0000000 --- a/0011-Backport-LoongArch-Add-the-support-for-vector-in-llvm17.patch +++ /dev/null @@ -1,56520 +0,0 @@ -From 6ff32ae0ca7a400249535b19d9ca489b44deae19 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 9 Aug 2023 16:01:37 +0800 -Subject: [PATCH 01/35] [Clang][LoongArch] Use the ClangBuiltin class to - automatically generate support for CBE and CFE - -Fixed the type modifier (L->W), removed redundant feature checking code -since the feature has already been checked in `EmitBuiltinExpr`. And -Cleaned up unused diagnostic information. - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D156866 - -(cherry picked from commit ea8d3b1f9f2d7385d97fcd34d14db0eb2cb2795c) ---- - llvm/include/llvm/IR/IntrinsicsLoongArch.td | 141 ++++++++++---------- - llvm/lib/IR/Function.cpp | 1 + - 2 files changed, 72 insertions(+), 70 deletions(-) - -diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -index 5edce3c529e1..4219b2f55346 100644 ---- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td -+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -@@ -51,74 +51,75 @@ defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics; - //===----------------------------------------------------------------------===// - // LoongArch BASE - --def int_loongarch_break : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; --def int_loongarch_cacop_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], -- [ImmArg>, ImmArg>]>; --def int_loongarch_cacop_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], -- [ImmArg>, ImmArg>]>; --def int_loongarch_dbar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; --def int_loongarch_ibar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; --def int_loongarch_movfcsr2gr : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_movgr2fcsr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_syscall : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; -- --def int_loongarch_crc_w_b_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crc_w_h_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crc_w_w_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crc_w_d_w : Intrinsic<[llvm_i32_ty], -- [llvm_i64_ty, llvm_i32_ty]>; -- --def int_loongarch_crcc_w_b_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crcc_w_h_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crcc_w_w_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crcc_w_d_w : Intrinsic<[llvm_i32_ty], -- [llvm_i64_ty, llvm_i32_ty]>; -- --def int_loongarch_csrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrwr_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrwr_d : Intrinsic<[llvm_i64_ty], -- [llvm_i64_ty, llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrxchg_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty, -- llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrxchg_d : Intrinsic<[llvm_i64_ty], -- [llvm_i64_ty, llvm_i64_ty, -- llvm_i32_ty], -- [ImmArg>]>; -- --def int_loongarch_iocsrrd_b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; --def int_loongarch_iocsrrd_h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; --def int_loongarch_iocsrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; --def int_loongarch_iocsrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty]>; -- --def int_loongarch_iocsrwr_b : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_iocsrwr_h : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_iocsrwr_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_iocsrwr_d : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty]>; -- --def int_loongarch_cpucfg : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; -- --def int_loongarch_asrtle_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; --def int_loongarch_asrtgt_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; -- --def int_loongarch_lddir_d : Intrinsic<[llvm_i64_ty], -- [llvm_i64_ty, llvm_i64_ty], -- [ImmArg>]>; --def int_loongarch_ldpte_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], -- [ImmArg>]>; -+class BaseInt ret_types, list param_types, -+ list intr_properties = []> -+ : Intrinsic, -+ ClangBuiltin; -+ -+def int_loongarch_break : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; -+def int_loongarch_cacop_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], -+ [ImmArg>, ImmArg>]>; -+def int_loongarch_cacop_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], -+ [ImmArg>, ImmArg>]>; -+def int_loongarch_dbar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; -+ -+def int_loongarch_ibar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; -+def int_loongarch_movfcsr2gr : BaseInt<[llvm_i32_ty], [llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_movgr2fcsr : BaseInt<[], [llvm_i32_ty, llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_syscall : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; -+ -+def int_loongarch_crc_w_b_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crc_w_h_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crc_w_w_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crc_w_d_w : BaseInt<[llvm_i32_ty], -+ [llvm_i64_ty, llvm_i32_ty]>; -+ -+def int_loongarch_crcc_w_b_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crcc_w_h_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crcc_w_w_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crcc_w_d_w : BaseInt<[llvm_i32_ty], -+ [llvm_i64_ty, llvm_i32_ty]>; -+ -+def int_loongarch_csrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrwr_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrwr_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrxchg_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrxchg_d : BaseInt<[llvm_i64_ty], -+ [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], -+ [ImmArg>]>; -+ -+def int_loongarch_iocsrrd_b : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; -+def int_loongarch_iocsrrd_h : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; -+def int_loongarch_iocsrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; -+def int_loongarch_iocsrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty]>; -+ -+def int_loongarch_iocsrwr_b : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_iocsrwr_h : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_iocsrwr_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_iocsrwr_d : BaseInt<[], [llvm_i64_ty, llvm_i32_ty]>; -+ -+def int_loongarch_cpucfg : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; -+ -+def int_loongarch_asrtle_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; -+def int_loongarch_asrtgt_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; -+ -+def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], -+ [ImmArg>]>; -+def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], -+ [ImmArg>]>; - } // TargetPrefix = "loongarch" -diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp -index 27219e89dc5f..435800d9e5f9 100644 ---- a/llvm/lib/IR/Function.cpp -+++ b/llvm/lib/IR/Function.cpp -@@ -37,6 +37,7 @@ - #include "llvm/IR/IntrinsicsBPF.h" - #include "llvm/IR/IntrinsicsDirectX.h" - #include "llvm/IR/IntrinsicsHexagon.h" -+#include "llvm/IR/IntrinsicsLoongArch.h" - #include "llvm/IR/IntrinsicsMips.h" - #include "llvm/IR/IntrinsicsNVPTX.h" - #include "llvm/IR/IntrinsicsPowerPC.h" --- -2.20.1 - - -From fca9d0a876fb72d3b483044a7616d27a47121512 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Sat, 19 Aug 2023 15:58:38 +0800 -Subject: [PATCH 02/35] [LoongArch] Add LSX intrinsic support - -For handling intrinsics, our approach is not simply to match them -one-to-one with instructions. Instead, we lower some intrinsics -to common nodes and then perform matching. The advantage of this -approach is that it allows us to fully utilize the passes available -at the common layer for optimizing purposes. - -We perform error checks on the immediate operand of all intrinsics, -rather than waiting until the end to throw exceptions. - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D155829 - -(cherry picked from commit 53141b2fcfa20616970833e6513537d211116c05) ---- - llvm/include/llvm/IR/IntrinsicsLoongArch.td | 524 ++++++++++ - .../LoongArch/LoongArchISelDAGToDAG.cpp | 100 +- - .../Target/LoongArch/LoongArchISelDAGToDAG.h | 8 + - .../LoongArch/LoongArchISelLowering.cpp | 902 +++++++++++++++++- - .../Target/LoongArch/LoongArchISelLowering.h | 14 + - .../Target/LoongArch/LoongArchInstrInfo.cpp | 12 + - .../Target/LoongArch/LoongArchInstrInfo.td | 6 +- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 816 ++++++++++++++++ - 8 files changed, 2359 insertions(+), 23 deletions(-) - -diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -index 4219b2f55346..d39d8261ebe3 100644 ---- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td -+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -@@ -123,3 +123,527 @@ def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], - [ImmArg>]>; - } // TargetPrefix = "loongarch" -+ -+/// Vector intrinsic -+ -+class VecInt ret_types, list param_types, -+ list intr_properties = []> -+ : Intrinsic, -+ ClangBuiltin; -+ -+//===----------------------------------------------------------------------===// -+// LSX -+ -+let TargetPrefix = "loongarch" in { -+ -+foreach inst = ["vadd_b", "vsub_b", -+ "vsadd_b", "vsadd_bu", "vssub_b", "vssub_bu", -+ "vavg_b", "vavg_bu", "vavgr_b", "vavgr_bu", -+ "vabsd_b", "vabsd_bu", "vadda_b", -+ "vmax_b", "vmax_bu", "vmin_b", "vmin_bu", -+ "vmul_b", "vmuh_b", "vmuh_bu", -+ "vdiv_b", "vdiv_bu", "vmod_b", "vmod_bu", "vsigncov_b", -+ "vand_v", "vor_v", "vxor_v", "vnor_v", "vandn_v", "vorn_v", -+ "vsll_b", "vsrl_b", "vsra_b", "vrotr_b", "vsrlr_b", "vsrar_b", -+ "vbitclr_b", "vbitset_b", "vbitrev_b", -+ "vseq_b", "vsle_b", "vsle_bu", "vslt_b", "vslt_bu", -+ "vpackev_b", "vpackod_b", "vpickev_b", "vpickod_b", -+ "vilvl_b", "vilvh_b"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vadd_h", "vsub_h", -+ "vsadd_h", "vsadd_hu", "vssub_h", "vssub_hu", -+ "vavg_h", "vavg_hu", "vavgr_h", "vavgr_hu", -+ "vabsd_h", "vabsd_hu", "vadda_h", -+ "vmax_h", "vmax_hu", "vmin_h", "vmin_hu", -+ "vmul_h", "vmuh_h", "vmuh_hu", -+ "vdiv_h", "vdiv_hu", "vmod_h", "vmod_hu", "vsigncov_h", -+ "vsll_h", "vsrl_h", "vsra_h", "vrotr_h", "vsrlr_h", "vsrar_h", -+ "vbitclr_h", "vbitset_h", "vbitrev_h", -+ "vseq_h", "vsle_h", "vsle_hu", "vslt_h", "vslt_hu", -+ "vpackev_h", "vpackod_h", "vpickev_h", "vpickod_h", -+ "vilvl_h", "vilvh_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vadd_w", "vsub_w", -+ "vsadd_w", "vsadd_wu", "vssub_w", "vssub_wu", -+ "vavg_w", "vavg_wu", "vavgr_w", "vavgr_wu", -+ "vabsd_w", "vabsd_wu", "vadda_w", -+ "vmax_w", "vmax_wu", "vmin_w", "vmin_wu", -+ "vmul_w", "vmuh_w", "vmuh_wu", -+ "vdiv_w", "vdiv_wu", "vmod_w", "vmod_wu", "vsigncov_w", -+ "vsll_w", "vsrl_w", "vsra_w", "vrotr_w", "vsrlr_w", "vsrar_w", -+ "vbitclr_w", "vbitset_w", "vbitrev_w", -+ "vseq_w", "vsle_w", "vsle_wu", "vslt_w", "vslt_wu", -+ "vpackev_w", "vpackod_w", "vpickev_w", "vpickod_w", -+ "vilvl_w", "vilvh_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vadd_d", "vadd_q", "vsub_d", "vsub_q", -+ "vsadd_d", "vsadd_du", "vssub_d", "vssub_du", -+ "vhaddw_q_d", "vhaddw_qu_du", "vhsubw_q_d", "vhsubw_qu_du", -+ "vaddwev_q_d", "vaddwod_q_d", "vsubwev_q_d", "vsubwod_q_d", -+ "vaddwev_q_du", "vaddwod_q_du", "vsubwev_q_du", "vsubwod_q_du", -+ "vaddwev_q_du_d", "vaddwod_q_du_d", -+ "vavg_d", "vavg_du", "vavgr_d", "vavgr_du", -+ "vabsd_d", "vabsd_du", "vadda_d", -+ "vmax_d", "vmax_du", "vmin_d", "vmin_du", -+ "vmul_d", "vmuh_d", "vmuh_du", -+ "vmulwev_q_d", "vmulwod_q_d", "vmulwev_q_du", "vmulwod_q_du", -+ "vmulwev_q_du_d", "vmulwod_q_du_d", -+ "vdiv_d", "vdiv_du", "vmod_d", "vmod_du", "vsigncov_d", -+ "vsll_d", "vsrl_d", "vsra_d", "vrotr_d", "vsrlr_d", "vsrar_d", -+ "vbitclr_d", "vbitset_d", "vbitrev_d", -+ "vseq_d", "vsle_d", "vsle_du", "vslt_d", "vslt_du", -+ "vpackev_d", "vpackod_d", "vpickev_d", "vpickod_d", -+ "vilvl_d", "vilvh_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vaddi_bu", "vsubi_bu", -+ "vmaxi_b", "vmaxi_bu", "vmini_b", "vmini_bu", -+ "vsat_b", "vsat_bu", -+ "vandi_b", "vori_b", "vxori_b", "vnori_b", -+ "vslli_b", "vsrli_b", "vsrai_b", "vrotri_b", -+ "vsrlri_b", "vsrari_b", -+ "vbitclri_b", "vbitseti_b", "vbitrevi_b", -+ "vseqi_b", "vslei_b", "vslei_bu", "vslti_b", "vslti_bu", -+ "vreplvei_b", "vbsll_v", "vbsrl_v", "vshuf4i_b"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vaddi_hu", "vsubi_hu", -+ "vmaxi_h", "vmaxi_hu", "vmini_h", "vmini_hu", -+ "vsat_h", "vsat_hu", -+ "vslli_h", "vsrli_h", "vsrai_h", "vrotri_h", -+ "vsrlri_h", "vsrari_h", -+ "vbitclri_h", "vbitseti_h", "vbitrevi_h", -+ "vseqi_h", "vslei_h", "vslei_hu", "vslti_h", "vslti_hu", -+ "vreplvei_h", "vshuf4i_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vaddi_wu", "vsubi_wu", -+ "vmaxi_w", "vmaxi_wu", "vmini_w", "vmini_wu", -+ "vsat_w", "vsat_wu", -+ "vslli_w", "vsrli_w", "vsrai_w", "vrotri_w", -+ "vsrlri_w", "vsrari_w", -+ "vbitclri_w", "vbitseti_w", "vbitrevi_w", -+ "vseqi_w", "vslei_w", "vslei_wu", "vslti_w", "vslti_wu", -+ "vreplvei_w", "vshuf4i_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vaddi_du", "vsubi_du", -+ "vmaxi_d", "vmaxi_du", "vmini_d", "vmini_du", -+ "vsat_d", "vsat_du", -+ "vslli_d", "vsrli_d", "vsrai_d", "vrotri_d", -+ "vsrlri_d", "vsrari_d", -+ "vbitclri_d", "vbitseti_d", "vbitrevi_d", -+ "vseqi_d", "vslei_d", "vslei_du", "vslti_d", "vslti_du", -+ "vreplvei_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["vhaddw_h_b", "vhaddw_hu_bu", "vhsubw_h_b", "vhsubw_hu_bu", -+ "vaddwev_h_b", "vaddwod_h_b", "vsubwev_h_b", "vsubwod_h_b", -+ "vaddwev_h_bu", "vaddwod_h_bu", "vsubwev_h_bu", "vsubwod_h_bu", -+ "vaddwev_h_bu_b", "vaddwod_h_bu_b", -+ "vmulwev_h_b", "vmulwod_h_b", "vmulwev_h_bu", "vmulwod_h_bu", -+ "vmulwev_h_bu_b", "vmulwod_h_bu_b"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vhaddw_w_h", "vhaddw_wu_hu", "vhsubw_w_h", "vhsubw_wu_hu", -+ "vaddwev_w_h", "vaddwod_w_h", "vsubwev_w_h", "vsubwod_w_h", -+ "vaddwev_w_hu", "vaddwod_w_hu", "vsubwev_w_hu", "vsubwod_w_hu", -+ "vaddwev_w_hu_h", "vaddwod_w_hu_h", -+ "vmulwev_w_h", "vmulwod_w_h", "vmulwev_w_hu", "vmulwod_w_hu", -+ "vmulwev_w_hu_h", "vmulwod_w_hu_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vhaddw_d_w", "vhaddw_du_wu", "vhsubw_d_w", "vhsubw_du_wu", -+ "vaddwev_d_w", "vaddwod_d_w", "vsubwev_d_w", "vsubwod_d_w", -+ "vaddwev_d_wu", "vaddwod_d_wu", "vsubwev_d_wu", "vsubwod_d_wu", -+ "vaddwev_d_wu_w", "vaddwod_d_wu_w", -+ "vmulwev_d_w", "vmulwod_d_w", "vmulwev_d_wu", "vmulwod_d_wu", -+ "vmulwev_d_wu_w", "vmulwod_d_wu_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsrln_b_h", "vsran_b_h", "vsrlrn_b_h", "vsrarn_b_h", -+ "vssrln_b_h", "vssran_b_h", "vssrln_bu_h", "vssran_bu_h", -+ "vssrlrn_b_h", "vssrarn_b_h", "vssrlrn_bu_h", "vssrarn_bu_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsrln_h_w", "vsran_h_w", "vsrlrn_h_w", "vsrarn_h_w", -+ "vssrln_h_w", "vssran_h_w", "vssrln_hu_w", "vssran_hu_w", -+ "vssrlrn_h_w", "vssrarn_h_w", "vssrlrn_hu_w", "vssrarn_hu_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsrln_w_d", "vsran_w_d", "vsrlrn_w_d", "vsrarn_w_d", -+ "vssrln_w_d", "vssran_w_d", "vssrln_wu_d", "vssran_wu_d", -+ "vssrlrn_w_d", "vssrarn_w_d", "vssrlrn_wu_d", "vssrarn_wu_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vmadd_b", "vmsub_b", "vfrstp_b", "vbitsel_v", "vshuf_b"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmadd_h", "vmsub_h", "vfrstp_h", "vshuf_h"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmadd_w", "vmsub_w", "vshuf_w"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmadd_d", "vmsub_d", "vshuf_d"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsrlni_b_h", "vsrani_b_h", "vsrlrni_b_h", "vsrarni_b_h", -+ "vssrlni_b_h", "vssrani_b_h", "vssrlni_bu_h", "vssrani_bu_h", -+ "vssrlrni_b_h", "vssrarni_b_h", "vssrlrni_bu_h", "vssrarni_bu_h", -+ "vfrstpi_b", "vbitseli_b", "vextrins_b"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsrlni_h_w", "vsrani_h_w", "vsrlrni_h_w", "vsrarni_h_w", -+ "vssrlni_h_w", "vssrani_h_w", "vssrlni_hu_w", "vssrani_hu_w", -+ "vssrlrni_h_w", "vssrarni_h_w", "vssrlrni_hu_w", "vssrarni_hu_w", -+ "vfrstpi_h", "vextrins_h"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsrlni_w_d", "vsrani_w_d", "vsrlrni_w_d", "vsrarni_w_d", -+ "vssrlni_w_d", "vssrani_w_d", "vssrlni_wu_d", "vssrani_wu_d", -+ "vssrlrni_w_d", "vssrarni_w_d", "vssrlrni_wu_d", "vssrarni_wu_d", -+ "vpermi_w", "vextrins_w"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsrlni_d_q", "vsrani_d_q", "vsrlrni_d_q", "vsrarni_d_q", -+ "vssrlni_d_q", "vssrani_d_q", "vssrlni_du_q", "vssrani_du_q", -+ "vssrlrni_d_q", "vssrarni_d_q", "vssrlrni_du_q", "vssrarni_du_q", -+ "vshuf4i_d", "vextrins_d"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["vmaddwev_h_b", "vmaddwod_h_b", "vmaddwev_h_bu", -+ "vmaddwod_h_bu", "vmaddwev_h_bu_b", "vmaddwod_h_bu_b"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmaddwev_w_h", "vmaddwod_w_h", "vmaddwev_w_hu", -+ "vmaddwod_w_hu", "vmaddwev_w_hu_h", "vmaddwod_w_hu_h"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmaddwev_d_w", "vmaddwod_d_w", "vmaddwev_d_wu", -+ "vmaddwod_d_wu", "vmaddwev_d_wu_w", "vmaddwod_d_wu_w"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmaddwev_q_d", "vmaddwod_q_d", "vmaddwev_q_du", -+ "vmaddwod_q_du", "vmaddwev_q_du_d", "vmaddwod_q_du_d"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsllwil_h_b", "vsllwil_hu_bu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v16i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsllwil_w_h", "vsllwil_wu_hu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v8i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsllwil_d_w", "vsllwil_du_wu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v4i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["vneg_b", "vmskltz_b", "vmskgez_b", "vmsknz_b", -+ "vclo_b", "vclz_b", "vpcnt_b"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vneg_h", "vmskltz_h", "vclo_h", "vclz_h", "vpcnt_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vneg_w", "vmskltz_w", "vclo_w", "vclz_w", "vpcnt_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vneg_d", "vexth_q_d", "vexth_qu_du", "vmskltz_d", -+ "vextl_q_d", "vextl_qu_du", "vclo_d", "vclz_d", "vpcnt_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vexth_h_b", "vexth_hu_bu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vexth_w_h", "vexth_wu_hu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vexth_d_w", "vexth_du_wu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lsx_vldi : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vrepli_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vrepli_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vrepli_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vrepli_d : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lsx_vreplgr2vr_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_vreplgr2vr_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_vreplgr2vr_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_vreplgr2vr_d : VecInt<[llvm_v2i64_ty], [llvm_i64_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lsx_vinsgr2vr_b -+ : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vinsgr2vr_h -+ : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vinsgr2vr_w -+ : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vinsgr2vr_d -+ : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lsx_vreplve_b -+ : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lsx_vreplve_h -+ : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lsx_vreplve_w -+ : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lsx_vreplve_d -+ : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; -+ -+foreach inst = ["vpickve2gr_b", "vpickve2gr_bu" ] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], -+ [llvm_v16i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vpickve2gr_h", "vpickve2gr_hu" ] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], -+ [llvm_v8i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vpickve2gr_w", "vpickve2gr_wu" ] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], -+ [llvm_v4i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vpickve2gr_d", "vpickve2gr_du" ] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_i64_ty], -+ [llvm_v2i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lsx_bz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lsx_bnz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bnz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bnz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bnz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bnz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+// LSX Float -+ -+foreach inst = ["vfadd_s", "vfsub_s", "vfmul_s", "vfdiv_s", -+ "vfmax_s", "vfmin_s", "vfmaxa_s", "vfmina_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], -+ [llvm_v4f32_ty, llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfadd_d", "vfsub_d", "vfmul_d", "vfdiv_d", -+ "vfmax_d", "vfmin_d", "vfmaxa_d", "vfmina_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vfmadd_s", "vfmsub_s", "vfnmadd_s", "vfnmsub_s"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v4f32_ty], -+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfmadd_d", "vfmsub_d", "vfnmadd_d", "vfnmsub_d"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2f64_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vflogb_s", "vfsqrt_s", "vfrecip_s", "vfrsqrt_s", "vfrint_s", -+ "vfrintrne_s", "vfrintrz_s", "vfrintrp_s", "vfrintrm_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vflogb_d", "vfsqrt_d", "vfrecip_d", "vfrsqrt_d", "vfrint_d", -+ "vfrintrne_d", "vfrintrz_d", "vfrintrp_d", "vfrintrm_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vfcvtl_s_h", "vfcvth_s_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfcvtl_d_s", "vfcvth_d_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4f32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vftintrne_w_s", "vftintrz_w_s", "vftintrp_w_s", "vftintrm_w_s", -+ "vftint_w_s", "vftintrz_wu_s", "vftint_wu_s", "vfclass_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vftintrne_l_d", "vftintrz_l_d", "vftintrp_l_d", "vftintrm_l_d", -+ "vftint_l_d", "vftintrz_lu_d", "vftint_lu_d", "vfclass_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vftintrnel_l_s", "vftintrneh_l_s", "vftintrzl_l_s", -+ "vftintrzh_l_s", "vftintrpl_l_s", "vftintrph_l_s", -+ "vftintrml_l_s", "vftintrmh_l_s", "vftintl_l_s", -+ "vftinth_l_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4f32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vffint_s_w", "vffint_s_wu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vffint_d_l", "vffint_d_lu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vffintl_d_w", "vffinth_d_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vffint_s_l"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+foreach inst = ["vftintrne_w_d", "vftintrz_w_d", "vftintrp_w_d", "vftintrm_w_d", -+ "vftint_w_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vfcvt_h_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v4f32_ty, llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfcvt_s_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vfcmp_caf_s", "vfcmp_cun_s", "vfcmp_ceq_s", "vfcmp_cueq_s", -+ "vfcmp_clt_s", "vfcmp_cult_s", "vfcmp_cle_s", "vfcmp_cule_s", -+ "vfcmp_cne_s", "vfcmp_cor_s", "vfcmp_cune_s", -+ "vfcmp_saf_s", "vfcmp_sun_s", "vfcmp_seq_s", "vfcmp_sueq_s", -+ "vfcmp_slt_s", "vfcmp_sult_s", "vfcmp_sle_s", "vfcmp_sule_s", -+ "vfcmp_sne_s", "vfcmp_sor_s", "vfcmp_sune_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v4f32_ty, llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfcmp_caf_d", "vfcmp_cun_d", "vfcmp_ceq_d", "vfcmp_cueq_d", -+ "vfcmp_clt_d", "vfcmp_cult_d", "vfcmp_cle_d", "vfcmp_cule_d", -+ "vfcmp_cne_d", "vfcmp_cor_d", "vfcmp_cune_d", -+ "vfcmp_saf_d", "vfcmp_sun_d", "vfcmp_seq_d", "vfcmp_sueq_d", -+ "vfcmp_slt_d", "vfcmp_sult_d", "vfcmp_sle_d", "vfcmp_sule_d", -+ "vfcmp_sne_d", "vfcmp_sor_d", "vfcmp_sune_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+// LSX load/store -+def int_loongarch_lsx_vld -+ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vldx -+ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], -+ [IntrReadMem, IntrArgMemOnly]>; -+def int_loongarch_lsx_vldrepl_b -+ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vldrepl_h -+ : VecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vldrepl_w -+ : VecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vldrepl_d -+ : VecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+ -+def int_loongarch_lsx_vst -+ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vstx -+ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], -+ [IntrWriteMem, IntrArgMemOnly]>; -+def int_loongarch_lsx_vstelm_b -+ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lsx_vstelm_h -+ : VecInt<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lsx_vstelm_w -+ : VecInt<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lsx_vstelm_d -+ : VecInt<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+ -+} // TargetPrefix = "loongarch" -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -index ae7167cb5ce7..f55184019988 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -@@ -15,6 +15,7 @@ - #include "MCTargetDesc/LoongArchMCTargetDesc.h" - #include "MCTargetDesc/LoongArchMatInt.h" - #include "llvm/Support/KnownBits.h" -+#include "llvm/Support/raw_ostream.h" - - using namespace llvm; - -@@ -75,7 +76,14 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { - ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); - return; - } -- // TODO: Add selection nodes needed later. -+ case ISD::BITCAST: { -+ if (VT.is128BitVector() || VT.is512BitVector()) { -+ ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); -+ CurDAG->RemoveDeadNode(Node); -+ return; -+ } -+ break; -+ } - } - - // Select the default instruction. -@@ -262,6 +270,96 @@ bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { - return false; - } - -+bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, -+ unsigned MinSizeInBits) const { -+ if (!Subtarget->hasExtLSX()) -+ return false; -+ -+ BuildVectorSDNode *Node = dyn_cast(N); -+ -+ if (!Node) -+ return false; -+ -+ APInt SplatValue, SplatUndef; -+ unsigned SplatBitSize; -+ bool HasAnyUndefs; -+ -+ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, -+ MinSizeInBits, /*IsBigEndian=*/false)) -+ return false; -+ -+ Imm = SplatValue; -+ -+ return true; -+} -+ -+template -+bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) { -+ APInt ImmValue; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0); -+ -+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && -+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { -+ if (IsSigned && ImmValue.isSignedIntN(ImmBitSize)) { -+ SplatVal = CurDAG->getTargetConstant(ImmValue.getSExtValue(), SDLoc(N), -+ Subtarget->getGRLenVT()); -+ return true; -+ } -+ if (!IsSigned && ImmValue.isIntN(ImmBitSize)) { -+ SplatVal = CurDAG->getTargetConstant(ImmValue.getZExtValue(), SDLoc(N), -+ Subtarget->getGRLenVT()); -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, -+ SDValue &SplatImm) const { -+ APInt ImmValue; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0); -+ -+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && -+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { -+ int32_t Log2 = (~ImmValue).exactLogBase2(); -+ -+ if (Log2 != -1) { -+ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, -+ SDValue &SplatImm) const { -+ APInt ImmValue; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0); -+ -+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && -+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { -+ int32_t Log2 = ImmValue.exactLogBase2(); -+ -+ if (Log2 != -1) { -+ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); -+ return true; -+ } -+ } -+ -+ return false; -+} -+ - // This pass converts a legalized DAG into a LoongArch-specific DAG, ready - // for instruction scheduling. - FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -index 3099407aea3e..5e3d6ccc3755 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -@@ -56,6 +56,14 @@ public: - bool selectSExti32(SDValue N, SDValue &Val); - bool selectZExti32(SDValue N, SDValue &Val); - -+ bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; -+ -+ template -+ bool selectVSplatImm(SDValue N, SDValue &SplatVal); -+ -+ bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const; -+ bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const; -+ - // Include the pieces autogenerated from the target description. - #include "LoongArchGenDAGISel.inc" - }; -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index db5961fc501a..c05133647929 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -62,6 +62,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - MVT::v4i64}) - addRegisterClass(VT, &LoongArch::LASX256RegClass); - -+ static const MVT::SimpleValueType LSXVTs[] = { -+ MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; -+ -+ if (Subtarget.hasExtLSX()) -+ for (MVT VT : LSXVTs) -+ addRegisterClass(VT, &LoongArch::LSX128RegClass); -+ - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, - MVT::i1, Promote); - -@@ -109,6 +116,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); - setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); - if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - if (Subtarget.hasBasicF()) -@@ -138,6 +146,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); - } - - static const ISD::CondCode FPCCToExpand[] = { -@@ -194,6 +203,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); - } - -+ if (Subtarget.hasExtLSX()) -+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, -+ {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); -+ - // Compute derived properties from the register classes. - computeRegisterProperties(Subtarget.getRegisterInfo()); - -@@ -215,6 +228,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::SRL); -+ if (Subtarget.hasExtLSX()) -+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - } - - bool LoongArchTargetLowering::isOffsetFoldingLegal( -@@ -652,9 +667,24 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, - return Addr; - } - -+template -+static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, -+ SelectionDAG &DAG, bool IsSigned = false) { -+ auto *CImm = cast(Op->getOperand(ImmOp)); -+ // Check the ImmArg. -+ if ((IsSigned && !isInt(CImm->getSExtValue())) || -+ (!IsSigned && !isUInt(CImm->getZExtValue()))) { -+ DAG.getContext()->emitError(Op->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); -+ } -+ return SDValue(); -+} -+ - SDValue - LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - SelectionDAG &DAG) const { -+ SDLoc DL(Op); - switch (Op.getConstantOperandVal(0)) { - default: - return SDValue(); // Don't custom lower most intrinsics. -@@ -662,6 +692,141 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - return DAG.getRegister(LoongArch::R2, PtrVT); - } -+ case Intrinsic::loongarch_lsx_vpickve2gr_d: -+ case Intrinsic::loongarch_lsx_vpickve2gr_du: -+ case Intrinsic::loongarch_lsx_vreplvei_d: -+ return checkIntrinsicImmArg<1>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vreplvei_w: -+ return checkIntrinsicImmArg<2>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vsat_b: -+ case Intrinsic::loongarch_lsx_vsat_bu: -+ case Intrinsic::loongarch_lsx_vrotri_b: -+ case Intrinsic::loongarch_lsx_vsllwil_h_b: -+ case Intrinsic::loongarch_lsx_vsllwil_hu_bu: -+ case Intrinsic::loongarch_lsx_vsrlri_b: -+ case Intrinsic::loongarch_lsx_vsrari_b: -+ case Intrinsic::loongarch_lsx_vreplvei_h: -+ return checkIntrinsicImmArg<3>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vsat_h: -+ case Intrinsic::loongarch_lsx_vsat_hu: -+ case Intrinsic::loongarch_lsx_vrotri_h: -+ case Intrinsic::loongarch_lsx_vsllwil_w_h: -+ case Intrinsic::loongarch_lsx_vsllwil_wu_hu: -+ case Intrinsic::loongarch_lsx_vsrlri_h: -+ case Intrinsic::loongarch_lsx_vsrari_h: -+ case Intrinsic::loongarch_lsx_vreplvei_b: -+ return checkIntrinsicImmArg<4>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vsrlni_b_h: -+ case Intrinsic::loongarch_lsx_vsrani_b_h: -+ case Intrinsic::loongarch_lsx_vsrlrni_b_h: -+ case Intrinsic::loongarch_lsx_vsrarni_b_h: -+ case Intrinsic::loongarch_lsx_vssrlni_b_h: -+ case Intrinsic::loongarch_lsx_vssrani_b_h: -+ case Intrinsic::loongarch_lsx_vssrlni_bu_h: -+ case Intrinsic::loongarch_lsx_vssrani_bu_h: -+ case Intrinsic::loongarch_lsx_vssrlrni_b_h: -+ case Intrinsic::loongarch_lsx_vssrarni_b_h: -+ case Intrinsic::loongarch_lsx_vssrlrni_bu_h: -+ case Intrinsic::loongarch_lsx_vssrarni_bu_h: -+ return checkIntrinsicImmArg<4>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vsat_w: -+ case Intrinsic::loongarch_lsx_vsat_wu: -+ case Intrinsic::loongarch_lsx_vrotri_w: -+ case Intrinsic::loongarch_lsx_vsllwil_d_w: -+ case Intrinsic::loongarch_lsx_vsllwil_du_wu: -+ case Intrinsic::loongarch_lsx_vsrlri_w: -+ case Intrinsic::loongarch_lsx_vsrari_w: -+ case Intrinsic::loongarch_lsx_vslei_bu: -+ case Intrinsic::loongarch_lsx_vslei_hu: -+ case Intrinsic::loongarch_lsx_vslei_wu: -+ case Intrinsic::loongarch_lsx_vslei_du: -+ case Intrinsic::loongarch_lsx_vslti_bu: -+ case Intrinsic::loongarch_lsx_vslti_hu: -+ case Intrinsic::loongarch_lsx_vslti_wu: -+ case Intrinsic::loongarch_lsx_vslti_du: -+ case Intrinsic::loongarch_lsx_vbsll_v: -+ case Intrinsic::loongarch_lsx_vbsrl_v: -+ return checkIntrinsicImmArg<5>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vseqi_b: -+ case Intrinsic::loongarch_lsx_vseqi_h: -+ case Intrinsic::loongarch_lsx_vseqi_w: -+ case Intrinsic::loongarch_lsx_vseqi_d: -+ case Intrinsic::loongarch_lsx_vslei_b: -+ case Intrinsic::loongarch_lsx_vslei_h: -+ case Intrinsic::loongarch_lsx_vslei_w: -+ case Intrinsic::loongarch_lsx_vslei_d: -+ case Intrinsic::loongarch_lsx_vslti_b: -+ case Intrinsic::loongarch_lsx_vslti_h: -+ case Intrinsic::loongarch_lsx_vslti_w: -+ case Intrinsic::loongarch_lsx_vslti_d: -+ return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); -+ case Intrinsic::loongarch_lsx_vsrlni_h_w: -+ case Intrinsic::loongarch_lsx_vsrani_h_w: -+ case Intrinsic::loongarch_lsx_vsrlrni_h_w: -+ case Intrinsic::loongarch_lsx_vsrarni_h_w: -+ case Intrinsic::loongarch_lsx_vssrlni_h_w: -+ case Intrinsic::loongarch_lsx_vssrani_h_w: -+ case Intrinsic::loongarch_lsx_vssrlni_hu_w: -+ case Intrinsic::loongarch_lsx_vssrani_hu_w: -+ case Intrinsic::loongarch_lsx_vssrlrni_h_w: -+ case Intrinsic::loongarch_lsx_vssrarni_h_w: -+ case Intrinsic::loongarch_lsx_vssrlrni_hu_w: -+ case Intrinsic::loongarch_lsx_vssrarni_hu_w: -+ case Intrinsic::loongarch_lsx_vfrstpi_b: -+ case Intrinsic::loongarch_lsx_vfrstpi_h: -+ return checkIntrinsicImmArg<5>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vsat_d: -+ case Intrinsic::loongarch_lsx_vsat_du: -+ case Intrinsic::loongarch_lsx_vrotri_d: -+ case Intrinsic::loongarch_lsx_vsrlri_d: -+ case Intrinsic::loongarch_lsx_vsrari_d: -+ return checkIntrinsicImmArg<6>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vsrlni_w_d: -+ case Intrinsic::loongarch_lsx_vsrani_w_d: -+ case Intrinsic::loongarch_lsx_vsrlrni_w_d: -+ case Intrinsic::loongarch_lsx_vsrarni_w_d: -+ case Intrinsic::loongarch_lsx_vssrlni_w_d: -+ case Intrinsic::loongarch_lsx_vssrani_w_d: -+ case Intrinsic::loongarch_lsx_vssrlni_wu_d: -+ case Intrinsic::loongarch_lsx_vssrani_wu_d: -+ case Intrinsic::loongarch_lsx_vssrlrni_w_d: -+ case Intrinsic::loongarch_lsx_vssrarni_w_d: -+ case Intrinsic::loongarch_lsx_vssrlrni_wu_d: -+ case Intrinsic::loongarch_lsx_vssrarni_wu_d: -+ return checkIntrinsicImmArg<6>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vsrlni_d_q: -+ case Intrinsic::loongarch_lsx_vsrani_d_q: -+ case Intrinsic::loongarch_lsx_vsrlrni_d_q: -+ case Intrinsic::loongarch_lsx_vsrarni_d_q: -+ case Intrinsic::loongarch_lsx_vssrlni_d_q: -+ case Intrinsic::loongarch_lsx_vssrani_d_q: -+ case Intrinsic::loongarch_lsx_vssrlni_du_q: -+ case Intrinsic::loongarch_lsx_vssrani_du_q: -+ case Intrinsic::loongarch_lsx_vssrlrni_d_q: -+ case Intrinsic::loongarch_lsx_vssrarni_d_q: -+ case Intrinsic::loongarch_lsx_vssrlrni_du_q: -+ case Intrinsic::loongarch_lsx_vssrarni_du_q: -+ return checkIntrinsicImmArg<7>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vnori_b: -+ case Intrinsic::loongarch_lsx_vshuf4i_b: -+ case Intrinsic::loongarch_lsx_vshuf4i_h: -+ case Intrinsic::loongarch_lsx_vshuf4i_w: -+ return checkIntrinsicImmArg<8>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vshuf4i_d: -+ case Intrinsic::loongarch_lsx_vpermi_w: -+ case Intrinsic::loongarch_lsx_vbitseli_b: -+ case Intrinsic::loongarch_lsx_vextrins_b: -+ case Intrinsic::loongarch_lsx_vextrins_h: -+ case Intrinsic::loongarch_lsx_vextrins_w: -+ case Intrinsic::loongarch_lsx_vextrins_d: -+ return checkIntrinsicImmArg<8>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vrepli_b: -+ case Intrinsic::loongarch_lsx_vrepli_h: -+ case Intrinsic::loongarch_lsx_vrepli_w: -+ case Intrinsic::loongarch_lsx_vrepli_d: -+ return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); -+ case Intrinsic::loongarch_lsx_vldi: -+ return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); - } - } - -@@ -757,6 +922,29 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, - : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, - {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); - } -+ case Intrinsic::loongarch_lsx_vld: -+ case Intrinsic::loongarch_lsx_vldrepl_b: -+ return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) -+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vldrepl_h: -+ return !isShiftedInt<11, 1>( -+ cast(Op.getOperand(3))->getSExtValue()) -+ ? emitIntrinsicWithChainErrorMessage( -+ Op, "argument out of range or not a multiple of 2", DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vldrepl_w: -+ return !isShiftedInt<10, 2>( -+ cast(Op.getOperand(3))->getSExtValue()) -+ ? emitIntrinsicWithChainErrorMessage( -+ Op, "argument out of range or not a multiple of 4", DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vldrepl_d: -+ return !isShiftedInt<9, 3>( -+ cast(Op.getOperand(3))->getSExtValue()) -+ ? emitIntrinsicWithChainErrorMessage( -+ Op, "argument out of range or not a multiple of 8", DAG) -+ : SDValue(); - } - } - -@@ -875,6 +1063,36 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) - : Op; - } -+ case Intrinsic::loongarch_lsx_vst: -+ return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vstelm_b: -+ return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vstelm_h: -+ return (!isShiftedInt<8, 1>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 2", DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vstelm_w: -+ return (!isShiftedInt<8, 2>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 4", DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vstelm_d: -+ return (!isShiftedInt<8, 3>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<1>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 8", DAG) -+ : SDValue(); - } - } - -@@ -1026,16 +1244,110 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, - return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); - } - --// Helper function that emits error message for intrinsics with chain and return --// a UNDEF and the chain as the results. --static void emitErrorAndReplaceIntrinsicWithChainResults( -+// Helper function that emits error message for intrinsics with/without chain -+// and return a UNDEF or and the chain as the results. -+static void emitErrorAndReplaceIntrinsicResults( - SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, -- StringRef ErrorMsg) { -+ StringRef ErrorMsg, bool WithChain = true) { - DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); - Results.push_back(DAG.getUNDEF(N->getValueType(0))); -+ if (!WithChain) -+ return; - Results.push_back(N->getOperand(0)); - } - -+template -+static void -+replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl &Results, -+ SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, -+ unsigned ResOp) { -+ const StringRef ErrorMsgOOR = "argument out of range"; -+ unsigned Imm = cast(Node->getOperand(2))->getZExtValue(); -+ if (!isUInt(Imm)) { -+ emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, -+ /*WithChain=*/false); -+ return; -+ } -+ SDLoc DL(Node); -+ SDValue Vec = Node->getOperand(1); -+ -+ SDValue PickElt = -+ DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, -+ DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), -+ DAG.getValueType(Vec.getValueType().getVectorElementType())); -+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), -+ PickElt.getValue(0))); -+} -+ -+static void replaceVecCondBranchResults(SDNode *N, -+ SmallVectorImpl &Results, -+ SelectionDAG &DAG, -+ const LoongArchSubtarget &Subtarget, -+ unsigned ResOp) { -+ SDLoc DL(N); -+ SDValue Vec = N->getOperand(1); -+ -+ SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); -+ Results.push_back( -+ DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); -+} -+ -+static void -+replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, -+ SelectionDAG &DAG, -+ const LoongArchSubtarget &Subtarget) { -+ switch (N->getConstantOperandVal(0)) { -+ default: -+ llvm_unreachable("Unexpected Intrinsic."); -+ case Intrinsic::loongarch_lsx_vpickve2gr_b: -+ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_SEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_h: -+ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_SEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_w: -+ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_SEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_bu: -+ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_ZEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_hu: -+ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_ZEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_wu: -+ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_ZEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_bz_b: -+ case Intrinsic::loongarch_lsx_bz_h: -+ case Intrinsic::loongarch_lsx_bz_w: -+ case Intrinsic::loongarch_lsx_bz_d: -+ replaceVecCondBranchResults(N, Results, DAG, Subtarget, -+ LoongArchISD::VALL_ZERO); -+ break; -+ case Intrinsic::loongarch_lsx_bz_v: -+ replaceVecCondBranchResults(N, Results, DAG, Subtarget, -+ LoongArchISD::VANY_ZERO); -+ break; -+ case Intrinsic::loongarch_lsx_bnz_b: -+ case Intrinsic::loongarch_lsx_bnz_h: -+ case Intrinsic::loongarch_lsx_bnz_w: -+ case Intrinsic::loongarch_lsx_bnz_d: -+ replaceVecCondBranchResults(N, Results, DAG, Subtarget, -+ LoongArchISD::VALL_NONZERO); -+ break; -+ case Intrinsic::loongarch_lsx_bnz_v: -+ replaceVecCondBranchResults(N, Results, DAG, Subtarget, -+ LoongArchISD::VANY_NONZERO); -+ break; -+ } -+} -+ - void LoongArchTargetLowering::ReplaceNodeResults( - SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - SDLoc DL(N); -@@ -1168,14 +1480,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( - llvm_unreachable("Unexpected Intrinsic."); - case Intrinsic::loongarch_movfcsr2gr: { - if (!Subtarget.hasBasicF()) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgReqF); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); - return; - } - unsigned Imm = cast(Op2)->getZExtValue(); - if (!isUInt<2>(Imm)) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgOOR); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); - return; - } - SDValue MOVFCSR2GRResults = DAG.getNode( -@@ -1211,7 +1521,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - {Chain, Op2, \ - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ -- Results.push_back(NODE.getValue(1)); \ -+ Results.push_back(NODE.getValue(1)); \ - break; \ - } - CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) -@@ -1220,8 +1530,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - #define CSR_CASE(ID) \ - case Intrinsic::loongarch_##ID: { \ - if (!Subtarget.is64Bit()) \ -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ -- ErrorMsgReqLA64); \ -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ - break; \ - } - CSR_CASE(csrrd_d); -@@ -1232,8 +1541,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - case Intrinsic::loongarch_csrrd_w: { - unsigned Imm = cast(Op2)->getZExtValue(); - if (!isUInt<14>(Imm)) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgOOR); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); - return; - } - SDValue CSRRDResults = -@@ -1247,8 +1555,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - case Intrinsic::loongarch_csrwr_w: { - unsigned Imm = cast(N->getOperand(3))->getZExtValue(); - if (!isUInt<14>(Imm)) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgOOR); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); - return; - } - SDValue CSRWRResults = -@@ -1263,8 +1570,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - case Intrinsic::loongarch_csrxchg_w: { - unsigned Imm = cast(N->getOperand(4))->getZExtValue(); - if (!isUInt<14>(Imm)) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgOOR); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); - return; - } - SDValue CSRXCHGResults = DAG.getNode( -@@ -1302,8 +1608,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - } - case Intrinsic::loongarch_lddir_d: { - if (!Subtarget.is64Bit()) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgReqLA64); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); - return; - } - break; -@@ -1322,6 +1627,10 @@ void LoongArchTargetLowering::ReplaceNodeResults( - Results.push_back(N->getOperand(0)); - break; - } -+ case ISD::INTRINSIC_WO_CHAIN: { -+ replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); -+ break; -+ } - } - } - -@@ -1685,6 +1994,440 @@ static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, - Src.getOperand(0)); - } - -+template -+static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, -+ SelectionDAG &DAG, -+ const LoongArchSubtarget &Subtarget, -+ bool IsSigned = false) { -+ SDLoc DL(Node); -+ auto *CImm = cast(Node->getOperand(ImmOp)); -+ // Check the ImmArg. -+ if ((IsSigned && !isInt(CImm->getSExtValue())) || -+ (!IsSigned && !isUInt(CImm->getZExtValue()))) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); -+ } -+ return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); -+} -+ -+template -+static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, -+ SelectionDAG &DAG, bool IsSigned = false) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ auto *CImm = cast(Node->getOperand(ImmOp)); -+ -+ // Check the ImmArg. -+ if ((IsSigned && !isInt(CImm->getSExtValue())) || -+ (!IsSigned && !isUInt(CImm->getZExtValue()))) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, ResTy); -+ } -+ return DAG.getConstant( -+ APInt(ResTy.getScalarType().getSizeInBits(), -+ IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), -+ DL, ResTy); -+} -+ -+static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ SDValue Vec = Node->getOperand(2); -+ SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); -+ return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); -+} -+ -+static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ SDValue One = DAG.getConstant(1, DL, ResTy); -+ SDValue Bit = -+ DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); -+ -+ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), -+ DAG.getNOT(DL, Bit, ResTy)); -+} -+ -+template -+static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ auto *CImm = cast(Node->getOperand(2)); -+ // Check the unsigned ImmArg. -+ if (!isUInt(CImm->getZExtValue())) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, ResTy); -+ } -+ -+ APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); -+ SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); -+ -+ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); -+} -+ -+template -+static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ auto *CImm = cast(Node->getOperand(2)); -+ // Check the unsigned ImmArg. -+ if (!isUInt(CImm->getZExtValue())) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, ResTy); -+ } -+ -+ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); -+ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); -+ return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); -+} -+ -+template -+static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ auto *CImm = cast(Node->getOperand(2)); -+ // Check the unsigned ImmArg. -+ if (!isUInt(CImm->getZExtValue())) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, ResTy); -+ } -+ -+ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); -+ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); -+ return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); -+} -+ -+static SDValue -+performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, -+ TargetLowering::DAGCombinerInfo &DCI, -+ const LoongArchSubtarget &Subtarget) { -+ SDLoc DL(N); -+ switch (N->getConstantOperandVal(0)) { -+ default: -+ break; -+ case Intrinsic::loongarch_lsx_vadd_b: -+ case Intrinsic::loongarch_lsx_vadd_h: -+ case Intrinsic::loongarch_lsx_vadd_w: -+ case Intrinsic::loongarch_lsx_vadd_d: -+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vaddi_bu: -+ case Intrinsic::loongarch_lsx_vaddi_hu: -+ case Intrinsic::loongarch_lsx_vaddi_wu: -+ case Intrinsic::loongarch_lsx_vaddi_du: -+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsub_b: -+ case Intrinsic::loongarch_lsx_vsub_h: -+ case Intrinsic::loongarch_lsx_vsub_w: -+ case Intrinsic::loongarch_lsx_vsub_d: -+ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vsubi_bu: -+ case Intrinsic::loongarch_lsx_vsubi_hu: -+ case Intrinsic::loongarch_lsx_vsubi_wu: -+ case Intrinsic::loongarch_lsx_vsubi_du: -+ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vneg_b: -+ case Intrinsic::loongarch_lsx_vneg_h: -+ case Intrinsic::loongarch_lsx_vneg_w: -+ case Intrinsic::loongarch_lsx_vneg_d: -+ return DAG.getNode( -+ ISD::SUB, DL, N->getValueType(0), -+ DAG.getConstant( -+ APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, -+ /*isSigned=*/true), -+ SDLoc(N), N->getValueType(0)), -+ N->getOperand(1)); -+ case Intrinsic::loongarch_lsx_vmax_b: -+ case Intrinsic::loongarch_lsx_vmax_h: -+ case Intrinsic::loongarch_lsx_vmax_w: -+ case Intrinsic::loongarch_lsx_vmax_d: -+ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmax_bu: -+ case Intrinsic::loongarch_lsx_vmax_hu: -+ case Intrinsic::loongarch_lsx_vmax_wu: -+ case Intrinsic::loongarch_lsx_vmax_du: -+ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmaxi_b: -+ case Intrinsic::loongarch_lsx_vmaxi_h: -+ case Intrinsic::loongarch_lsx_vmaxi_w: -+ case Intrinsic::loongarch_lsx_vmaxi_d: -+ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); -+ case Intrinsic::loongarch_lsx_vmaxi_bu: -+ case Intrinsic::loongarch_lsx_vmaxi_hu: -+ case Intrinsic::loongarch_lsx_vmaxi_wu: -+ case Intrinsic::loongarch_lsx_vmaxi_du: -+ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vmin_b: -+ case Intrinsic::loongarch_lsx_vmin_h: -+ case Intrinsic::loongarch_lsx_vmin_w: -+ case Intrinsic::loongarch_lsx_vmin_d: -+ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmin_bu: -+ case Intrinsic::loongarch_lsx_vmin_hu: -+ case Intrinsic::loongarch_lsx_vmin_wu: -+ case Intrinsic::loongarch_lsx_vmin_du: -+ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmini_b: -+ case Intrinsic::loongarch_lsx_vmini_h: -+ case Intrinsic::loongarch_lsx_vmini_w: -+ case Intrinsic::loongarch_lsx_vmini_d: -+ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); -+ case Intrinsic::loongarch_lsx_vmini_bu: -+ case Intrinsic::loongarch_lsx_vmini_hu: -+ case Intrinsic::loongarch_lsx_vmini_wu: -+ case Intrinsic::loongarch_lsx_vmini_du: -+ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vmul_b: -+ case Intrinsic::loongarch_lsx_vmul_h: -+ case Intrinsic::loongarch_lsx_vmul_w: -+ case Intrinsic::loongarch_lsx_vmul_d: -+ return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmadd_b: -+ case Intrinsic::loongarch_lsx_vmadd_h: -+ case Intrinsic::loongarch_lsx_vmadd_w: -+ case Intrinsic::loongarch_lsx_vmadd_d: { -+ EVT ResTy = N->getValueType(0); -+ return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), -+ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), -+ N->getOperand(3))); -+ } -+ case Intrinsic::loongarch_lsx_vmsub_b: -+ case Intrinsic::loongarch_lsx_vmsub_h: -+ case Intrinsic::loongarch_lsx_vmsub_w: -+ case Intrinsic::loongarch_lsx_vmsub_d: { -+ EVT ResTy = N->getValueType(0); -+ return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), -+ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), -+ N->getOperand(3))); -+ } -+ case Intrinsic::loongarch_lsx_vdiv_b: -+ case Intrinsic::loongarch_lsx_vdiv_h: -+ case Intrinsic::loongarch_lsx_vdiv_w: -+ case Intrinsic::loongarch_lsx_vdiv_d: -+ return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vdiv_bu: -+ case Intrinsic::loongarch_lsx_vdiv_hu: -+ case Intrinsic::loongarch_lsx_vdiv_wu: -+ case Intrinsic::loongarch_lsx_vdiv_du: -+ return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmod_b: -+ case Intrinsic::loongarch_lsx_vmod_h: -+ case Intrinsic::loongarch_lsx_vmod_w: -+ case Intrinsic::loongarch_lsx_vmod_d: -+ return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmod_bu: -+ case Intrinsic::loongarch_lsx_vmod_hu: -+ case Intrinsic::loongarch_lsx_vmod_wu: -+ case Intrinsic::loongarch_lsx_vmod_du: -+ return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vand_v: -+ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vor_v: -+ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vxor_v: -+ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vnor_v: { -+ SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ return DAG.getNOT(DL, Res, Res->getValueType(0)); -+ } -+ case Intrinsic::loongarch_lsx_vandi_b: -+ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<8>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vori_b: -+ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<8>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vxori_b: -+ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<8>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsll_b: -+ case Intrinsic::loongarch_lsx_vsll_h: -+ case Intrinsic::loongarch_lsx_vsll_w: -+ case Intrinsic::loongarch_lsx_vsll_d: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ truncateVecElts(N, DAG)); -+ case Intrinsic::loongarch_lsx_vslli_b: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<3>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vslli_h: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<4>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vslli_w: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vslli_d: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<6>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrl_b: -+ case Intrinsic::loongarch_lsx_vsrl_h: -+ case Intrinsic::loongarch_lsx_vsrl_w: -+ case Intrinsic::loongarch_lsx_vsrl_d: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ truncateVecElts(N, DAG)); -+ case Intrinsic::loongarch_lsx_vsrli_b: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<3>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrli_h: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<4>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrli_w: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrli_d: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<6>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsra_b: -+ case Intrinsic::loongarch_lsx_vsra_h: -+ case Intrinsic::loongarch_lsx_vsra_w: -+ case Intrinsic::loongarch_lsx_vsra_d: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ truncateVecElts(N, DAG)); -+ case Intrinsic::loongarch_lsx_vsrai_b: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<3>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrai_h: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<4>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrai_w: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrai_d: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<6>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vpcnt_b: -+ case Intrinsic::loongarch_lsx_vpcnt_h: -+ case Intrinsic::loongarch_lsx_vpcnt_w: -+ case Intrinsic::loongarch_lsx_vpcnt_d: -+ return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); -+ case Intrinsic::loongarch_lsx_vbitclr_b: -+ case Intrinsic::loongarch_lsx_vbitclr_h: -+ case Intrinsic::loongarch_lsx_vbitclr_w: -+ case Intrinsic::loongarch_lsx_vbitclr_d: -+ return lowerVectorBitClear(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitclri_b: -+ return lowerVectorBitClearImm<3>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitclri_h: -+ return lowerVectorBitClearImm<4>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitclri_w: -+ return lowerVectorBitClearImm<5>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitclri_d: -+ return lowerVectorBitClearImm<6>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitset_b: -+ case Intrinsic::loongarch_lsx_vbitset_h: -+ case Intrinsic::loongarch_lsx_vbitset_w: -+ case Intrinsic::loongarch_lsx_vbitset_d: { -+ EVT VecTy = N->getValueType(0); -+ SDValue One = DAG.getConstant(1, DL, VecTy); -+ return DAG.getNode( -+ ISD::OR, DL, VecTy, N->getOperand(1), -+ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); -+ } -+ case Intrinsic::loongarch_lsx_vbitseti_b: -+ return lowerVectorBitSetImm<3>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitseti_h: -+ return lowerVectorBitSetImm<4>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitseti_w: -+ return lowerVectorBitSetImm<5>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitseti_d: -+ return lowerVectorBitSetImm<6>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitrev_b: -+ case Intrinsic::loongarch_lsx_vbitrev_h: -+ case Intrinsic::loongarch_lsx_vbitrev_w: -+ case Intrinsic::loongarch_lsx_vbitrev_d: { -+ EVT VecTy = N->getValueType(0); -+ SDValue One = DAG.getConstant(1, DL, VecTy); -+ return DAG.getNode( -+ ISD::XOR, DL, VecTy, N->getOperand(1), -+ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); -+ } -+ case Intrinsic::loongarch_lsx_vbitrevi_b: -+ return lowerVectorBitRevImm<3>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitrevi_h: -+ return lowerVectorBitRevImm<4>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitrevi_w: -+ return lowerVectorBitRevImm<5>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitrevi_d: -+ return lowerVectorBitRevImm<6>(N, DAG); -+ case Intrinsic::loongarch_lsx_vfadd_s: -+ case Intrinsic::loongarch_lsx_vfadd_d: -+ return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vfsub_s: -+ case Intrinsic::loongarch_lsx_vfsub_d: -+ return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vfmul_s: -+ case Intrinsic::loongarch_lsx_vfmul_d: -+ return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vfdiv_s: -+ case Intrinsic::loongarch_lsx_vfdiv_d: -+ return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vfmadd_s: -+ case Intrinsic::loongarch_lsx_vfmadd_d: -+ return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2), N->getOperand(3)); -+ case Intrinsic::loongarch_lsx_vinsgr2vr_b: -+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), -+ N->getOperand(1), N->getOperand(2), -+ legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); -+ case Intrinsic::loongarch_lsx_vinsgr2vr_h: -+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), -+ N->getOperand(1), N->getOperand(2), -+ legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); -+ case Intrinsic::loongarch_lsx_vinsgr2vr_w: -+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), -+ N->getOperand(1), N->getOperand(2), -+ legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); -+ case Intrinsic::loongarch_lsx_vinsgr2vr_d: -+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), -+ N->getOperand(1), N->getOperand(2), -+ legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); -+ case Intrinsic::loongarch_lsx_vreplgr2vr_b: -+ case Intrinsic::loongarch_lsx_vreplgr2vr_h: -+ case Intrinsic::loongarch_lsx_vreplgr2vr_w: -+ case Intrinsic::loongarch_lsx_vreplgr2vr_d: { -+ EVT ResTy = N->getValueType(0); -+ SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); -+ return DAG.getBuildVector(ResTy, DL, Ops); -+ } -+ case Intrinsic::loongarch_lsx_vreplve_b: -+ case Intrinsic::loongarch_lsx_vreplve_h: -+ case Intrinsic::loongarch_lsx_vreplve_w: -+ case Intrinsic::loongarch_lsx_vreplve_d: -+ return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), -+ N->getOperand(1), -+ DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), -+ N->getOperand(2))); -+ } -+ return SDValue(); -+} -+ - SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; -@@ -1699,6 +2442,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, - return performSRLCombine(N, DAG, DCI, Subtarget); - case LoongArchISD::BITREV_W: - return performBITREV_WCombine(N, DAG, DCI, Subtarget); -+ case ISD::INTRINSIC_WO_CHAIN: -+ return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); - } - return SDValue(); - } -@@ -1752,6 +2497,101 @@ static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, - return SinkMBB; - } - -+static MachineBasicBlock * -+emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, -+ const LoongArchSubtarget &Subtarget) { -+ unsigned CondOpc; -+ switch (MI.getOpcode()) { -+ default: -+ llvm_unreachable("Unexpected opcode"); -+ case LoongArch::PseudoVBZ: -+ CondOpc = LoongArch::VSETEQZ_V; -+ break; -+ case LoongArch::PseudoVBZ_B: -+ CondOpc = LoongArch::VSETANYEQZ_B; -+ break; -+ case LoongArch::PseudoVBZ_H: -+ CondOpc = LoongArch::VSETANYEQZ_H; -+ break; -+ case LoongArch::PseudoVBZ_W: -+ CondOpc = LoongArch::VSETANYEQZ_W; -+ break; -+ case LoongArch::PseudoVBZ_D: -+ CondOpc = LoongArch::VSETANYEQZ_D; -+ break; -+ case LoongArch::PseudoVBNZ: -+ CondOpc = LoongArch::VSETNEZ_V; -+ break; -+ case LoongArch::PseudoVBNZ_B: -+ CondOpc = LoongArch::VSETALLNEZ_B; -+ break; -+ case LoongArch::PseudoVBNZ_H: -+ CondOpc = LoongArch::VSETALLNEZ_H; -+ break; -+ case LoongArch::PseudoVBNZ_W: -+ CondOpc = LoongArch::VSETALLNEZ_W; -+ break; -+ case LoongArch::PseudoVBNZ_D: -+ CondOpc = LoongArch::VSETALLNEZ_D; -+ break; -+ } -+ -+ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -+ const BasicBlock *LLVM_BB = BB->getBasicBlock(); -+ DebugLoc DL = MI.getDebugLoc(); -+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); -+ MachineFunction::iterator It = ++BB->getIterator(); -+ -+ MachineFunction *F = BB->getParent(); -+ MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); -+ MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); -+ MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); -+ -+ F->insert(It, FalseBB); -+ F->insert(It, TrueBB); -+ F->insert(It, SinkBB); -+ -+ // Transfer the remainder of MBB and its successor edges to Sink. -+ SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); -+ SinkBB->transferSuccessorsAndUpdatePHIs(BB); -+ -+ // Insert the real instruction to BB. -+ Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); -+ BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); -+ -+ // Insert branch. -+ BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); -+ BB->addSuccessor(FalseBB); -+ BB->addSuccessor(TrueBB); -+ -+ // FalseBB. -+ Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); -+ BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) -+ .addReg(LoongArch::R0) -+ .addImm(0); -+ BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); -+ FalseBB->addSuccessor(SinkBB); -+ -+ // TrueBB. -+ Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); -+ BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) -+ .addReg(LoongArch::R0) -+ .addImm(1); -+ TrueBB->addSuccessor(SinkBB); -+ -+ // SinkBB: merge the results. -+ BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), -+ MI.getOperand(0).getReg()) -+ .addReg(RD1) -+ .addMBB(FalseBB) -+ .addReg(RD2) -+ .addMBB(TrueBB); -+ -+ // The pseudo instruction is gone now. -+ MI.eraseFromParent(); -+ return SinkBB; -+} -+ - MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - MachineInstr &MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -@@ -1786,6 +2626,17 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - MI.eraseFromParent(); - return BB; - } -+ case LoongArch::PseudoVBZ: -+ case LoongArch::PseudoVBZ_B: -+ case LoongArch::PseudoVBZ_H: -+ case LoongArch::PseudoVBZ_W: -+ case LoongArch::PseudoVBZ_D: -+ case LoongArch::PseudoVBNZ: -+ case LoongArch::PseudoVBNZ_B: -+ case LoongArch::PseudoVBNZ_H: -+ case LoongArch::PseudoVBNZ_W: -+ case LoongArch::PseudoVBNZ_D: -+ return emitVecCondBranchPseudo(MI, BB, Subtarget); - } - } - -@@ -1858,6 +2709,13 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { - NODE_NAME_CASE(MOVFCSR2GR) - NODE_NAME_CASE(CACOP_D) - NODE_NAME_CASE(CACOP_W) -+ NODE_NAME_CASE(VPICK_SEXT_ELT) -+ NODE_NAME_CASE(VPICK_ZEXT_ELT) -+ NODE_NAME_CASE(VREPLVE) -+ NODE_NAME_CASE(VALL_ZERO) -+ NODE_NAME_CASE(VANY_ZERO) -+ NODE_NAME_CASE(VALL_NONZERO) -+ NODE_NAME_CASE(VANY_NONZERO) - } - #undef NODE_NAME_CASE - return nullptr; -@@ -1884,6 +2742,10 @@ const MCPhysReg ArgFPR64s[] = { - LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, - LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; - -+const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, -+ LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, -+ LoongArch::VR6, LoongArch::VR7}; -+ - // Pass a 2*GRLen argument that has been split into two GRLen values through - // registers or the stack as necessary. - static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, -@@ -2030,6 +2892,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, - Reg = State.AllocateReg(ArgFPR32s); - else if (ValVT == MVT::f64 && !UseGPRForFloat) - Reg = State.AllocateReg(ArgFPR64s); -+ else if (ValVT.is128BitVector()) -+ Reg = State.AllocateReg(ArgVRs); - else - Reg = State.AllocateReg(ArgGPRs); - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 500407493fe5..7765057ebffb 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -110,6 +110,20 @@ enum NodeType : unsigned { - - // Read CPU configuration information operation - CPUCFG, -+ -+ // Vector Shuffle -+ VREPLVE, -+ -+ // Extended vector element extraction -+ VPICK_SEXT_ELT, -+ VPICK_ZEXT_ELT, -+ -+ // Vector comparisons -+ VALL_ZERO, -+ VANY_ZERO, -+ VALL_NONZERO, -+ VANY_NONZERO, -+ - // Intrinsic operations end ============================================= - }; - } // end namespace LoongArchISD -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -index ef79b8a0dcd3..a5d66ebac96a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -@@ -47,6 +47,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - return; - } - -+ // VR->VR copies. -+ if (LoongArch::LSX128RegClass.contains(DstReg, SrcReg)) { -+ BuildMI(MBB, MBBI, DL, get(LoongArch::VORI_B), DstReg) -+ .addReg(SrcReg, getKillRegState(KillSrc)) -+ .addImm(0); -+ return; -+ } -+ - // GPR->CFR copy. - if (LoongArch::CFRRegClass.contains(DstReg) && - LoongArch::GPRRegClass.contains(SrcReg)) { -@@ -99,6 +107,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( - Opcode = LoongArch::FST_S; - else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::FST_D; -+ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) -+ Opcode = LoongArch::VST; - else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) - Opcode = LoongArch::PseudoST_CFR; - else -@@ -133,6 +143,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - Opcode = LoongArch::FLD_S; - else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::FLD_D; -+ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) -+ Opcode = LoongArch::VLD; - else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) - Opcode = LoongArch::PseudoLD_CFR; - else -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index ac391ef471b1..b2c4bb812ba5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -182,7 +182,7 @@ def imm32 : Operand { - let ParserMatchClass = ImmAsmOperand<"", 32, "">; - } - --def uimm1 : Operand { -+def uimm1 : Operand, ImmLeaf(Imm);}]>{ - let ParserMatchClass = UImmAsmOperand<1>; - } - -@@ -197,11 +197,11 @@ def uimm2_plus1 : Operand, - let DecoderMethod = "decodeUImmOperand<2, 1>"; - } - --def uimm3 : Operand { -+def uimm3 : Operand, ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<3>; - } - --def uimm4 : Operand { -+def uimm4 : Operand, ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<4>; - } - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index a8ed285a37cf..13332be0bc38 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -10,6 +10,146 @@ - // - //===----------------------------------------------------------------------===// - -+def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, -+ SDTCisInt<1>, SDTCisVec<1>, -+ SDTCisSameAs<0, 1>, SDTCisInt<2>]>; -+def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; -+ -+// Target nodes. -+def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; -+def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO", -+ SDT_LoongArchVecCond>; -+def loongarch_vany_nonzero : SDNode<"LoongArchISD::VANY_NONZERO", -+ SDT_LoongArchVecCond>; -+def loongarch_vall_zero : SDNode<"LoongArchISD::VALL_ZERO", -+ SDT_LoongArchVecCond>; -+def loongarch_vany_zero : SDNode<"LoongArchISD::VANY_ZERO", -+ SDT_LoongArchVecCond>; -+ -+def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT", -+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; -+def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT", -+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; -+ -+class VecCond -+ : Pseudo<(outs GPR:$rd), (ins RC:$vj), -+ [(set GPR:$rd, (OpNode (TyNode RC:$vj)))]> { -+ let hasSideEffects = 0; -+ let mayLoad = 0; -+ let mayStore = 0; -+ let usesCustomInserter = 1; -+} -+ -+def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector), -+ (bitconvert (v4i32 (build_vector)))], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; -+}]>; -+ -+def vsplati8_imm_eq_7 : PatFrags<(ops), [(build_vector)], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 7; -+}]>; -+def vsplati16_imm_eq_15 : PatFrags<(ops), [(build_vector)], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 15; -+}]>; -+def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31; -+}]>; -+def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), -+ (bitconvert (v4i32 (build_vector)))], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; -+}]>; -+ -+def vsplati8imm7 : PatFrag<(ops node:$reg), -+ (and node:$reg, vsplati8_imm_eq_7)>; -+def vsplati16imm15 : PatFrag<(ops node:$reg), -+ (and node:$reg, vsplati16_imm_eq_15)>; -+def vsplati32imm31 : PatFrag<(ops node:$reg), -+ (and node:$reg, vsplati32_imm_eq_31)>; -+def vsplati64imm63 : PatFrag<(ops node:$reg), -+ (and node:$reg, vsplati64_imm_eq_63)>; -+ -+foreach N = [3, 4, 5, 6, 8] in -+ def SplatPat_uimm#N : ComplexPattern", -+ [build_vector, bitconvert], [], 2>; -+ -+foreach N = [5] in -+ def SplatPat_simm#N : ComplexPattern", -+ [build_vector, bitconvert]>; -+ -+def vsplat_uimm_inv_pow2 : ComplexPattern; -+ -+def vsplat_uimm_pow2 : ComplexPattern; -+ -+def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), -+ (add node:$vd, (mul node:$vj, node:$vk))>; -+ -+def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), -+ (sub node:$vd, (mul node:$vj, node:$vk))>; -+ -+def lsxsplati8 : PatFrag<(ops node:$e0), -+ (v16i8 (build_vector node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0))>; -+def lsxsplati16 : PatFrag<(ops node:$e0), -+ (v8i16 (build_vector node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0))>; -+def lsxsplati32 : PatFrag<(ops node:$e0), -+ (v4i32 (build_vector node:$e0, node:$e0, -+ node:$e0, node:$e0))>; -+ -+def lsxsplati64 : PatFrag<(ops node:$e0), -+ (v2i64 (build_vector node:$e0, node:$e0))>; -+ -+def to_valide_timm : SDNodeXForm(N); -+ return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); -+}]>; -+ - //===----------------------------------------------------------------------===// - // Instruction class templates - //===----------------------------------------------------------------------===// -@@ -1004,4 +1144,680 @@ def PseudoVREPLI_D : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [], - "vrepli.d", "$vd, $imm">; - } - -+def PseudoVBNZ_B : VecCond; -+def PseudoVBNZ_H : VecCond; -+def PseudoVBNZ_W : VecCond; -+def PseudoVBNZ_D : VecCond; -+def PseudoVBNZ : VecCond; -+ -+def PseudoVBZ_B : VecCond; -+def PseudoVBZ_H : VecCond; -+def PseudoVBZ_W : VecCond; -+def PseudoVBZ_D : VecCond; -+def PseudoVBZ : VecCond; -+ -+} // Predicates = [HasExtLSX] -+ -+multiclass PatVr { -+ def : Pat<(v16i8 (OpNode (v16i8 LSX128:$vj))), -+ (!cast(Inst#"_B") LSX128:$vj)>; -+ def : Pat<(v8i16 (OpNode (v8i16 LSX128:$vj))), -+ (!cast(Inst#"_H") LSX128:$vj)>; -+ def : Pat<(v4i32 (OpNode (v4i32 LSX128:$vj))), -+ (!cast(Inst#"_W") LSX128:$vj)>; -+ def : Pat<(v2i64 (OpNode (v2i64 LSX128:$vj))), -+ (!cast(Inst#"_D") LSX128:$vj)>; -+} -+ -+multiclass PatVrVr { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatVrVrF { -+ def : Pat<(OpNode (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), -+ (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatVrVrU { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatVrSimm5 { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; -+} -+ -+multiclass PatVrUimm5 { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; -+} -+ -+multiclass PatVrVrVr { -+ def : Pat<(OpNode (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst#"_B") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst#"_H") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst#"_W") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst#"_D") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatShiftVrVr { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (and vsplati8_imm_eq_7, -+ (v16i8 LSX128:$vk))), -+ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (and vsplati16_imm_eq_15, -+ (v8i16 LSX128:$vk))), -+ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (and vsplati32_imm_eq_31, -+ (v4i32 LSX128:$vk))), -+ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (and vsplati64_imm_eq_63, -+ (v2i64 LSX128:$vk))), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatShiftVrUimm { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm3 uimm3:$imm))), -+ (!cast(Inst#"_B") LSX128:$vj, uimm3:$imm)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm4 uimm4:$imm))), -+ (!cast(Inst#"_H") LSX128:$vj, uimm4:$imm)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_W") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm6 uimm6:$imm))), -+ (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; -+} -+ -+class PatVrVrB -+ : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (Inst LSX128:$vj, LSX128:$vk)>; -+ -+let Predicates = [HasExtLSX] in { -+ -+// VADD_{B/H/W/D} -+defm : PatVrVr; -+// VSUB_{B/H/W/D} -+defm : PatVrVr; -+ -+// VADDI_{B/H/W/D}U -+defm : PatVrUimm5; -+// VSUBI_{B/H/W/D}U -+defm : PatVrUimm5; -+ -+// VNEG_{B/H/W/D} -+def : Pat<(sub immAllZerosV, (v16i8 LSX128:$vj)), (VNEG_B LSX128:$vj)>; -+def : Pat<(sub immAllZerosV, (v8i16 LSX128:$vj)), (VNEG_H LSX128:$vj)>; -+def : Pat<(sub immAllZerosV, (v4i32 LSX128:$vj)), (VNEG_W LSX128:$vj)>; -+def : Pat<(sub immAllZerosV, (v2i64 LSX128:$vj)), (VNEG_D LSX128:$vj)>; -+ -+// VMAX[I]_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+defm : PatVrSimm5; -+defm : PatVrUimm5; -+ -+// VMIN[I]_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+defm : PatVrSimm5; -+defm : PatVrUimm5; -+ -+// VMUL_{B/H/W/D} -+defm : PatVrVr; -+ -+// VMADD_{B/H/W/D} -+defm : PatVrVrVr; -+// VMSUB_{B/H/W/D} -+defm : PatVrVrVr; -+ -+// VDIV_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+ -+// VMOD_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+ -+// VAND_V -+def : PatVrVrB; -+// VNOR_V -+def : PatVrVrB; -+// VXOR_V -+def : PatVrVrB; -+// VNOR_V -+def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), -+ (VNOR_V LSX128:$vj, LSX128:$vk)>; -+ -+// VANDI_B -+def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), -+ (VANDI_B LSX128:$vj, uimm8:$imm)>; -+// VORI_B -+def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), -+ (VORI_B LSX128:$vj, uimm8:$imm)>; -+ -+// VXORI_B -+def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), -+ (VXORI_B LSX128:$vj, uimm8:$imm)>; -+ -+// VSLL[I]_{B/H/W/D} -+defm : PatVrVr; -+defm : PatShiftVrVr; -+defm : PatShiftVrUimm; -+ -+// VSRL[I]_{B/H/W/D} -+defm : PatVrVr; -+defm : PatShiftVrVr; -+defm : PatShiftVrUimm; -+ -+// VSRA[I]_{B/H/W/D} -+defm : PatVrVr; -+defm : PatShiftVrVr; -+defm : PatShiftVrUimm; -+ -+// VPCNT_{B/H/W/D} -+defm : PatVr; -+ -+// VBITCLR_{B/H/W/D} -+def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))), -+ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))), -+ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))), -+ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))), -+ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; -+def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati8imm7 v16i8:$vk)))), -+ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati16imm15 v8i16:$vk)))), -+ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati32imm31 v4i32:$vk)))), -+ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati64imm63 v2i64:$vk)))), -+ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; -+ -+// VBITCLRI_{B/H/W/D} -+def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), -+ (VBITCLRI_B LSX128:$vj, uimm3:$imm)>; -+def : Pat<(and (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), -+ (VBITCLRI_H LSX128:$vj, uimm4:$imm)>; -+def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), -+ (VBITCLRI_W LSX128:$vj, uimm5:$imm)>; -+def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), -+ (VBITCLRI_D LSX128:$vj, uimm6:$imm)>; -+ -+// VBITSET_{B/H/W/D} -+def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), -+ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), -+ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), -+ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), -+ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; -+def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), -+ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), -+ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), -+ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), -+ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; -+ -+// VBITSETI_{B/H/W/D} -+def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), -+ (VBITSETI_B LSX128:$vj, uimm3:$imm)>; -+def : Pat<(or (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), -+ (VBITSETI_H LSX128:$vj, uimm4:$imm)>; -+def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), -+ (VBITSETI_W LSX128:$vj, uimm5:$imm)>; -+def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), -+ (VBITSETI_D LSX128:$vj, uimm6:$imm)>; -+ -+// VBITREV_{B/H/W/D} -+def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), -+ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), -+ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), -+ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), -+ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; -+def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), -+ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), -+ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), -+ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), -+ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; -+ -+// VBITREVI_{B/H/W/D} -+def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), -+ (VBITREVI_B LSX128:$vj, uimm3:$imm)>; -+def : Pat<(xor (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), -+ (VBITREVI_H LSX128:$vj, uimm4:$imm)>; -+def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), -+ (VBITREVI_W LSX128:$vj, uimm5:$imm)>; -+def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), -+ (VBITREVI_D LSX128:$vj, uimm6:$imm)>; -+ -+// VFADD_{S/D} -+defm : PatVrVrF; -+ -+// VFSUB_{S/D} -+defm : PatVrVrF; -+ -+// VFMUL_{S/D} -+defm : PatVrVrF; -+ -+// VFDIV_{S/D} -+defm : PatVrVrF; -+ -+// VFMADD_{S/D} -+def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), -+ (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), -+ (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+ -+// VINSGR2VR_{B/H/W/D} -+def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), -+ (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; -+def : Pat<(vector_insert v8i16:$vd, GRLenVT:$rj, uimm3:$imm), -+ (VINSGR2VR_H v8i16:$vd, GRLenVT:$rj, uimm3:$imm)>; -+def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), -+ (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>; -+def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), -+ (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; -+ -+// VPICKVE2GR_{B/H/W}[U] -+def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), -+ (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; -+def : Pat<(loongarch_vpick_sext_elt v8i16:$vd, uimm3:$imm, i16), -+ (VPICKVE2GR_H v8i16:$vd, uimm3:$imm)>; -+def : Pat<(loongarch_vpick_sext_elt v4i32:$vd, uimm2:$imm, i32), -+ (VPICKVE2GR_W v4i32:$vd, uimm2:$imm)>; -+ -+def : Pat<(loongarch_vpick_zext_elt v16i8:$vd, uimm4:$imm, i8), -+ (VPICKVE2GR_BU v16i8:$vd, uimm4:$imm)>; -+def : Pat<(loongarch_vpick_zext_elt v8i16:$vd, uimm3:$imm, i16), -+ (VPICKVE2GR_HU v8i16:$vd, uimm3:$imm)>; -+def : Pat<(loongarch_vpick_zext_elt v4i32:$vd, uimm2:$imm, i32), -+ (VPICKVE2GR_WU v4i32:$vd, uimm2:$imm)>; -+ -+// VREPLGR2VR_{B/H/W/D} -+def : Pat<(lsxsplati8 GPR:$rj), (VREPLGR2VR_B GPR:$rj)>; -+def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>; -+def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>; -+def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>; -+ -+// VREPLVE_{B/H/W/D} -+def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk), -+ (VREPLVE_B v16i8:$vj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v8i16:$vj, GRLenVT:$rk), -+ (VREPLVE_H v8i16:$vj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), -+ (VREPLVE_W v4i32:$vj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), -+ (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; -+ -+// Loads/Stores -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in { -+ defm : LdPat; -+ def : RegRegLdPat; -+ defm : StPat; -+ def : RegRegStPat; -+} -+ -+} // Predicates = [HasExtLSX] -+ -+/// Intrinsic pattern -+ -+class deriveLSXIntrinsic { -+ Intrinsic ret = !cast(!tolower("int_loongarch_lsx_"#Inst)); -+} -+ -+let Predicates = [HasExtLSX] in { -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vj, vty:$vk), -+// (LAInst vty:$vj, vty:$vk)>; -+foreach Inst = ["VSADD_B", "VSADD_BU", "VSSUB_B", "VSSUB_BU", -+ "VHADDW_H_B", "VHADDW_HU_BU", "VHSUBW_H_B", "VHSUBW_HU_BU", -+ "VADDWEV_H_B", "VADDWOD_H_B", "VSUBWEV_H_B", "VSUBWOD_H_B", -+ "VADDWEV_H_BU", "VADDWOD_H_BU", "VSUBWEV_H_BU", "VSUBWOD_H_BU", -+ "VADDWEV_H_BU_B", "VADDWOD_H_BU_B", -+ "VAVG_B", "VAVG_BU", "VAVGR_B", "VAVGR_BU", -+ "VABSD_B", "VABSD_BU", "VADDA_B", "VMUH_B", "VMUH_BU", -+ "VMULWEV_H_B", "VMULWOD_H_B", "VMULWEV_H_BU", "VMULWOD_H_BU", -+ "VMULWEV_H_BU_B", "VMULWOD_H_BU_B", "VSIGNCOV_B", -+ "VANDN_V", "VORN_V", "VROTR_B", "VSRLR_B", "VSRAR_B", -+ "VSEQ_B", "VSLE_B", "VSLE_BU", "VSLT_B", "VSLT_BU", -+ "VPACKEV_B", "VPACKOD_B", "VPICKEV_B", "VPICKOD_B", -+ "VILVL_B", "VILVH_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VSADD_H", "VSADD_HU", "VSSUB_H", "VSSUB_HU", -+ "VHADDW_W_H", "VHADDW_WU_HU", "VHSUBW_W_H", "VHSUBW_WU_HU", -+ "VADDWEV_W_H", "VADDWOD_W_H", "VSUBWEV_W_H", "VSUBWOD_W_H", -+ "VADDWEV_W_HU", "VADDWOD_W_HU", "VSUBWEV_W_HU", "VSUBWOD_W_HU", -+ "VADDWEV_W_HU_H", "VADDWOD_W_HU_H", -+ "VAVG_H", "VAVG_HU", "VAVGR_H", "VAVGR_HU", -+ "VABSD_H", "VABSD_HU", "VADDA_H", "VMUH_H", "VMUH_HU", -+ "VMULWEV_W_H", "VMULWOD_W_H", "VMULWEV_W_HU", "VMULWOD_W_HU", -+ "VMULWEV_W_HU_H", "VMULWOD_W_HU_H", "VSIGNCOV_H", "VROTR_H", -+ "VSRLR_H", "VSRAR_H", "VSRLN_B_H", "VSRAN_B_H", "VSRLRN_B_H", -+ "VSRARN_B_H", "VSSRLN_B_H", "VSSRAN_B_H", "VSSRLN_BU_H", -+ "VSSRAN_BU_H", "VSSRLRN_B_H", "VSSRARN_B_H", "VSSRLRN_BU_H", -+ "VSSRARN_BU_H", -+ "VSEQ_H", "VSLE_H", "VSLE_HU", "VSLT_H", "VSLT_HU", -+ "VPACKEV_H", "VPACKOD_H", "VPICKEV_H", "VPICKOD_H", -+ "VILVL_H", "VILVH_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VSADD_W", "VSADD_WU", "VSSUB_W", "VSSUB_WU", -+ "VHADDW_D_W", "VHADDW_DU_WU", "VHSUBW_D_W", "VHSUBW_DU_WU", -+ "VADDWEV_D_W", "VADDWOD_D_W", "VSUBWEV_D_W", "VSUBWOD_D_W", -+ "VADDWEV_D_WU", "VADDWOD_D_WU", "VSUBWEV_D_WU", "VSUBWOD_D_WU", -+ "VADDWEV_D_WU_W", "VADDWOD_D_WU_W", -+ "VAVG_W", "VAVG_WU", "VAVGR_W", "VAVGR_WU", -+ "VABSD_W", "VABSD_WU", "VADDA_W", "VMUH_W", "VMUH_WU", -+ "VMULWEV_D_W", "VMULWOD_D_W", "VMULWEV_D_WU", "VMULWOD_D_WU", -+ "VMULWEV_D_WU_W", "VMULWOD_D_WU_W", "VSIGNCOV_W", "VROTR_W", -+ "VSRLR_W", "VSRAR_W", "VSRLN_H_W", "VSRAN_H_W", "VSRLRN_H_W", -+ "VSRARN_H_W", "VSSRLN_H_W", "VSSRAN_H_W", "VSSRLN_HU_W", -+ "VSSRAN_HU_W", "VSSRLRN_H_W", "VSSRARN_H_W", "VSSRLRN_HU_W", -+ "VSSRARN_HU_W", -+ "VSEQ_W", "VSLE_W", "VSLE_WU", "VSLT_W", "VSLT_WU", -+ "VPACKEV_W", "VPACKOD_W", "VPICKEV_W", "VPICKOD_W", -+ "VILVL_W", "VILVH_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VADD_Q", "VSUB_Q", -+ "VSADD_D", "VSADD_DU", "VSSUB_D", "VSSUB_DU", -+ "VHADDW_Q_D", "VHADDW_QU_DU", "VHSUBW_Q_D", "VHSUBW_QU_DU", -+ "VADDWEV_Q_D", "VADDWOD_Q_D", "VSUBWEV_Q_D", "VSUBWOD_Q_D", -+ "VADDWEV_Q_DU", "VADDWOD_Q_DU", "VSUBWEV_Q_DU", "VSUBWOD_Q_DU", -+ "VADDWEV_Q_DU_D", "VADDWOD_Q_DU_D", -+ "VAVG_D", "VAVG_DU", "VAVGR_D", "VAVGR_DU", -+ "VABSD_D", "VABSD_DU", "VADDA_D", "VMUH_D", "VMUH_DU", -+ "VMULWEV_Q_D", "VMULWOD_Q_D", "VMULWEV_Q_DU", "VMULWOD_Q_DU", -+ "VMULWEV_Q_DU_D", "VMULWOD_Q_DU_D", "VSIGNCOV_D", "VROTR_D", -+ "VSRLR_D", "VSRAR_D", "VSRLN_W_D", "VSRAN_W_D", "VSRLRN_W_D", -+ "VSRARN_W_D", "VSSRLN_W_D", "VSSRAN_W_D", "VSSRLN_WU_D", -+ "VSSRAN_WU_D", "VSSRLRN_W_D", "VSSRARN_W_D", "VSSRLRN_WU_D", -+ "VSSRARN_WU_D", "VFFINT_S_L", -+ "VSEQ_D", "VSLE_D", "VSLE_DU", "VSLT_D", "VSLT_DU", -+ "VPACKEV_D", "VPACKOD_D", "VPICKEV_D", "VPICKOD_D", -+ "VILVL_D", "VILVH_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), -+// (LAInst vty:$vd, vty:$vj, vty:$vk)>; -+foreach Inst = ["VMADDWEV_H_B", "VMADDWOD_H_B", "VMADDWEV_H_BU", -+ "VMADDWOD_H_BU", "VMADDWEV_H_BU_B", "VMADDWOD_H_BU_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v8i16 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VMADDWEV_W_H", "VMADDWOD_W_H", "VMADDWEV_W_HU", -+ "VMADDWOD_W_HU", "VMADDWEV_W_HU_H", "VMADDWOD_W_HU_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4i32 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VMADDWEV_D_W", "VMADDWOD_D_W", "VMADDWEV_D_WU", -+ "VMADDWOD_D_WU", "VMADDWEV_D_WU_W", "VMADDWOD_D_WU_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2i64 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", -+ "VMADDWOD_Q_DU", "VMADDWEV_Q_DU_D", "VMADDWOD_Q_DU_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vj), -+// (LAInst vty:$vj)>; -+foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", -+ "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", -+ "VCLO_B", "VCLZ_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", -+ "VCLO_H", "VCLZ_H", "VFCVTL_S_H", "VFCVTH_S_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", -+ "VCLO_W", "VCLZ_W", "VFFINT_S_W", "VFFINT_S_WU", -+ "VFFINTL_D_W", "VFFINTH_D_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", -+ "VEXTL_Q_D", "VEXTL_QU_DU", -+ "VCLO_D", "VCLZ_D", "VFFINT_D_L", "VFFINT_D_LU"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+ -+// Pat<(Intrinsic timm:$imm) -+// (LAInst timm:$imm)>; -+def : Pat<(int_loongarch_lsx_vldi timm:$imm), -+ (VLDI (to_valide_timm timm:$imm))>; -+foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret timm:$imm), -+ (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vj, timm:$imm) -+// (LAInst vty:$vj, timm:$imm)>; -+foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", -+ "VSLLWIL_HU_BU", "VSRLRI_B", "VSRARI_B", -+ "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", -+ "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", -+ "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", -+ "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", -+ "VREPLVEI_H", "VSHUF4I_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", -+ "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", -+ "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", -+ "VREPLVEI_W", "VSHUF4I_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", -+ "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", -+ "VPICKVE2GR_D", "VPICKVE2GR_DU", -+ "VREPLVEI_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) -+// (LAInst vty:$vd, vty:$vj, timm:$imm)>; -+foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", -+ "VSSRLNI_B_H", "VSSRANI_B_H", "VSSRLNI_BU_H", "VSSRANI_BU_H", -+ "VSSRLRNI_B_H", "VSSRARNI_B_H", "VSSRLRNI_BU_H", "VSSRARNI_BU_H", -+ "VFRSTPI_B", "VBITSELI_B", "VEXTRINS_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", -+ "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", -+ "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", -+ "VFRSTPI_H", "VEXTRINS_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", -+ "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", -+ "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", -+ "VPERMI_W", "VEXTRINS_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", -+ "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", -+ "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", -+ "VSHUF4I_D", "VEXTRINS_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, -+ (to_valide_timm timm:$imm))>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), -+// (LAInst vty:$vd, vty:$vj, vty:$vk)>; -+foreach Inst = ["VFRSTP_B", "VBITSEL_V", "VSHUF_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VFRSTP_H", "VSHUF_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+def : Pat<(int_loongarch_lsx_vshuf_w (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), -+ (v4i32 LSX128:$vk)), -+ (VSHUF_W LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+def : Pat<(int_loongarch_lsx_vshuf_d (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), -+ (v2i64 LSX128:$vk)), -+ (VSHUF_D LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ -+// vty: v4f32/v2f64 -+// Pat<(Intrinsic vty:$vj, vty:$vk, vty:$va), -+// (LAInst vty:$vj, vty:$vk, vty:$va)>; -+foreach Inst = ["VFMSUB_S", "VFNMADD_S", "VFNMSUB_S"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), (v4f32 LSX128:$va)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; -+foreach Inst = ["VFMSUB_D", "VFNMADD_D", "VFNMSUB_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), (v2f64 LSX128:$va)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; -+ -+// vty: v4f32/v2f64 -+// Pat<(Intrinsic vty:$vj, vty:$vk), -+// (LAInst vty:$vj, vty:$vk)>; -+foreach Inst = ["VFMAX_S", "VFMIN_S", "VFMAXA_S", "VFMINA_S", "VFCVT_H_S", -+ "VFCMP_CAF_S", "VFCMP_CUN_S", "VFCMP_CEQ_S", "VFCMP_CUEQ_S", -+ "VFCMP_CLT_S", "VFCMP_CULT_S", "VFCMP_CLE_S", "VFCMP_CULE_S", -+ "VFCMP_CNE_S", "VFCMP_COR_S", "VFCMP_CUNE_S", -+ "VFCMP_SAF_S", "VFCMP_SUN_S", "VFCMP_SEQ_S", "VFCMP_SUEQ_S", -+ "VFCMP_SLT_S", "VFCMP_SULT_S", "VFCMP_SLE_S", "VFCMP_SULE_S", -+ "VFCMP_SNE_S", "VFCMP_SOR_S", "VFCMP_SUNE_S"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VFMAX_D", "VFMIN_D", "VFMAXA_D", "VFMINA_D", "VFCVT_S_D", -+ "VFTINTRNE_W_D", "VFTINTRZ_W_D", "VFTINTRP_W_D", "VFTINTRM_W_D", -+ "VFTINT_W_D", -+ "VFCMP_CAF_D", "VFCMP_CUN_D", "VFCMP_CEQ_D", "VFCMP_CUEQ_D", -+ "VFCMP_CLT_D", "VFCMP_CULT_D", "VFCMP_CLE_D", "VFCMP_CULE_D", -+ "VFCMP_CNE_D", "VFCMP_COR_D", "VFCMP_CUNE_D", -+ "VFCMP_SAF_D", "VFCMP_SUN_D", "VFCMP_SEQ_D", "VFCMP_SUEQ_D", -+ "VFCMP_SLT_D", "VFCMP_SULT_D", "VFCMP_SLE_D", "VFCMP_SULE_D", -+ "VFCMP_SNE_D", "VFCMP_SOR_D", "VFCMP_SUNE_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+ -+// vty: v4f32/v2f64 -+// Pat<(Intrinsic vty:$vj), -+// (LAInst vty:$vj)>; -+foreach Inst = ["VFLOGB_S", "VFCLASS_S", "VFSQRT_S", "VFRECIP_S", "VFRSQRT_S", -+ "VFRINT_S", "VFCVTL_D_S", "VFCVTH_D_S", -+ "VFRINTRNE_S", "VFRINTRZ_S", "VFRINTRP_S", "VFRINTRM_S", -+ "VFTINTRNE_W_S", "VFTINTRZ_W_S", "VFTINTRP_W_S", "VFTINTRM_W_S", -+ "VFTINT_W_S", "VFTINTRZ_WU_S", "VFTINT_WU_S", -+ "VFTINTRNEL_L_S", "VFTINTRNEH_L_S", "VFTINTRZL_L_S", -+ "VFTINTRZH_L_S", "VFTINTRPL_L_S", "VFTINTRPH_L_S", -+ "VFTINTRML_L_S", "VFTINTRMH_L_S", "VFTINTL_L_S", -+ "VFTINTH_L_S"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v4f32 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", -+ "VFRINT_D", -+ "VFRINTRNE_D", "VFRINTRZ_D", "VFRINTRP_D", "VFRINTRM_D", -+ "VFTINTRNE_L_D", "VFTINTRZ_L_D", "VFTINTRP_L_D", "VFTINTRM_L_D", -+ "VFTINT_L_D", "VFTINTRZ_LU_D", "VFTINT_LU_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+ -+// load -+def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), -+ (VLD GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), -+ (VLDX GPR:$rj, GPR:$rk)>; -+ -+def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), -+ (VLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), -+ (VLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), -+ (VLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), -+ (VLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; -+ -+// store -+def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), -+ (VST LSX128:$vd, GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), -+ (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; -+ -+def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), -+ (VSTELM_B v16i8:$vd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), -+ (VSTELM_H v8i16:$vd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), -+ (VSTELM_W v4i32:$vd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), -+ (VSTELM_D v2i64:$vd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+ - } // Predicates = [HasExtLSX] --- -2.20.1 - - -From 6f813b014a5df84162cc182994d597674d433a9a Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Sat, 19 Aug 2023 16:53:50 +0800 -Subject: [PATCH 03/35] [LoongArch] Add LASX intrinsic support - -This patch is similar to D155829. - -Depends on D155829 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D155830 - -(cherry picked from commit 691f0d00b84f6ecaf8e341ef38256e939cca6b1e) ---- - llvm/include/llvm/IR/IntrinsicsLoongArch.td | 523 +++++++++++++ - .../LoongArch/LoongArchISelLowering.cpp | 402 +++++++++- - .../Target/LoongArch/LoongArchInstrInfo.cpp | 12 + - .../LoongArch/LoongArchLASXInstrInfo.td | 702 ++++++++++++++++++ - 4 files changed, 1633 insertions(+), 6 deletions(-) - -diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -index d39d8261ebe3..685deaec7709 100644 ---- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td -+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -@@ -647,3 +647,526 @@ def int_loongarch_lsx_vstelm_d - [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; - - } // TargetPrefix = "loongarch" -+ -+//===----------------------------------------------------------------------===// -+// LASX -+ -+let TargetPrefix = "loongarch" in { -+foreach inst = ["xvadd_b", "xvsub_b", -+ "xvsadd_b", "xvsadd_bu", "xvssub_b", "xvssub_bu", -+ "xvavg_b", "xvavg_bu", "xvavgr_b", "xvavgr_bu", -+ "xvabsd_b", "xvabsd_bu", "xvadda_b", -+ "xvmax_b", "xvmax_bu", "xvmin_b", "xvmin_bu", -+ "xvmul_b", "xvmuh_b", "xvmuh_bu", -+ "xvdiv_b", "xvdiv_bu", "xvmod_b", "xvmod_bu", "xvsigncov_b", -+ "xvand_v", "xvor_v", "xvxor_v", "xvnor_v", "xvandn_v", "xvorn_v", -+ "xvsll_b", "xvsrl_b", "xvsra_b", "xvrotr_b", "xvsrlr_b", "xvsrar_b", -+ "xvbitclr_b", "xvbitset_b", "xvbitrev_b", -+ "xvseq_b", "xvsle_b", "xvsle_bu", "xvslt_b", "xvslt_bu", -+ "xvpackev_b", "xvpackod_b", "xvpickev_b", "xvpickod_b", -+ "xvilvl_b", "xvilvh_b"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], -+ [llvm_v32i8_ty, llvm_v32i8_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvadd_h", "xvsub_h", -+ "xvsadd_h", "xvsadd_hu", "xvssub_h", "xvssub_hu", -+ "xvavg_h", "xvavg_hu", "xvavgr_h", "xvavgr_hu", -+ "xvabsd_h", "xvabsd_hu", "xvadda_h", -+ "xvmax_h", "xvmax_hu", "xvmin_h", "xvmin_hu", -+ "xvmul_h", "xvmuh_h", "xvmuh_hu", -+ "xvdiv_h", "xvdiv_hu", "xvmod_h", "xvmod_hu", "xvsigncov_h", -+ "xvsll_h", "xvsrl_h", "xvsra_h", "xvrotr_h", "xvsrlr_h", "xvsrar_h", -+ "xvbitclr_h", "xvbitset_h", "xvbitrev_h", -+ "xvseq_h", "xvsle_h", "xvsle_hu", "xvslt_h", "xvslt_hu", -+ "xvpackev_h", "xvpackod_h", "xvpickev_h", "xvpickod_h", -+ "xvilvl_h", "xvilvh_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvadd_w", "xvsub_w", -+ "xvsadd_w", "xvsadd_wu", "xvssub_w", "xvssub_wu", -+ "xvavg_w", "xvavg_wu", "xvavgr_w", "xvavgr_wu", -+ "xvabsd_w", "xvabsd_wu", "xvadda_w", -+ "xvmax_w", "xvmax_wu", "xvmin_w", "xvmin_wu", -+ "xvmul_w", "xvmuh_w", "xvmuh_wu", -+ "xvdiv_w", "xvdiv_wu", "xvmod_w", "xvmod_wu", "xvsigncov_w", -+ "xvsll_w", "xvsrl_w", "xvsra_w", "xvrotr_w", "xvsrlr_w", "xvsrar_w", -+ "xvbitclr_w", "xvbitset_w", "xvbitrev_w", -+ "xvseq_w", "xvsle_w", "xvsle_wu", "xvslt_w", "xvslt_wu", -+ "xvpackev_w", "xvpackod_w", "xvpickev_w", "xvpickod_w", -+ "xvilvl_w", "xvilvh_w", "xvperm_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvadd_d", "xvadd_q", "xvsub_d", "xvsub_q", -+ "xvsadd_d", "xvsadd_du", "xvssub_d", "xvssub_du", -+ "xvhaddw_q_d", "xvhaddw_qu_du", "xvhsubw_q_d", "xvhsubw_qu_du", -+ "xvaddwev_q_d", "xvaddwod_q_d", "xvsubwev_q_d", "xvsubwod_q_d", -+ "xvaddwev_q_du", "xvaddwod_q_du", "xvsubwev_q_du", "xvsubwod_q_du", -+ "xvaddwev_q_du_d", "xvaddwod_q_du_d", -+ "xvavg_d", "xvavg_du", "xvavgr_d", "xvavgr_du", -+ "xvabsd_d", "xvabsd_du", "xvadda_d", -+ "xvmax_d", "xvmax_du", "xvmin_d", "xvmin_du", -+ "xvmul_d", "xvmuh_d", "xvmuh_du", -+ "xvmulwev_q_d", "xvmulwod_q_d", "xvmulwev_q_du", "xvmulwod_q_du", -+ "xvmulwev_q_du_d", "xvmulwod_q_du_d", -+ "xvdiv_d", "xvdiv_du", "xvmod_d", "xvmod_du", "xvsigncov_d", -+ "xvsll_d", "xvsrl_d", "xvsra_d", "xvrotr_d", "xvsrlr_d", "xvsrar_d", -+ "xvbitclr_d", "xvbitset_d", "xvbitrev_d", -+ "xvseq_d", "xvsle_d", "xvsle_du", "xvslt_d", "xvslt_du", -+ "xvpackev_d", "xvpackod_d", "xvpickev_d", "xvpickod_d", -+ "xvilvl_d", "xvilvh_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvaddi_bu", "xvsubi_bu", -+ "xvmaxi_b", "xvmaxi_bu", "xvmini_b", "xvmini_bu", -+ "xvsat_b", "xvsat_bu", -+ "xvandi_b", "xvori_b", "xvxori_b", "xvnori_b", -+ "xvslli_b", "xvsrli_b", "xvsrai_b", "xvrotri_b", -+ "xvsrlri_b", "xvsrari_b", -+ "xvbitclri_b", "xvbitseti_b", "xvbitrevi_b", -+ "xvseqi_b", "xvslei_b", "xvslei_bu", "xvslti_b", "xvslti_bu", -+ "xvrepl128vei_b", "xvbsll_v", "xvbsrl_v", "xvshuf4i_b"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], -+ [llvm_v32i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvaddi_hu", "xvsubi_hu", -+ "xvmaxi_h", "xvmaxi_hu", "xvmini_h", "xvmini_hu", -+ "xvsat_h", "xvsat_hu", -+ "xvslli_h", "xvsrli_h", "xvsrai_h", "xvrotri_h", -+ "xvsrlri_h", "xvsrari_h", -+ "xvbitclri_h", "xvbitseti_h", "xvbitrevi_h", -+ "xvseqi_h", "xvslei_h", "xvslei_hu", "xvslti_h", "xvslti_hu", -+ "xvrepl128vei_h", "xvshuf4i_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvaddi_wu", "xvsubi_wu", -+ "xvmaxi_w", "xvmaxi_wu", "xvmini_w", "xvmini_wu", -+ "xvsat_w", "xvsat_wu", -+ "xvslli_w", "xvsrli_w", "xvsrai_w", "xvrotri_w", -+ "xvsrlri_w", "xvsrari_w", -+ "xvbitclri_w", "xvbitseti_w", "xvbitrevi_w", -+ "xvseqi_w", "xvslei_w", "xvslei_wu", "xvslti_w", "xvslti_wu", -+ "xvrepl128vei_w", "xvshuf4i_w", "xvpickve_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvaddi_du", "xvsubi_du", -+ "xvmaxi_d", "xvmaxi_du", "xvmini_d", "xvmini_du", -+ "xvsat_d", "xvsat_du", -+ "xvslli_d", "xvsrli_d", "xvsrai_d", "xvrotri_d", -+ "xvsrlri_d", "xvsrari_d", -+ "xvbitclri_d", "xvbitseti_d", "xvbitrevi_d", -+ "xvseqi_d", "xvslei_d", "xvslei_du", "xvslti_d", "xvslti_du", -+ "xvrepl128vei_d", "xvpermi_d", "xvpickve_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["xvhaddw_h_b", "xvhaddw_hu_bu", "xvhsubw_h_b", "xvhsubw_hu_bu", -+ "xvaddwev_h_b", "xvaddwod_h_b", "xvsubwev_h_b", "xvsubwod_h_b", -+ "xvaddwev_h_bu", "xvaddwod_h_bu", "xvsubwev_h_bu", "xvsubwod_h_bu", -+ "xvaddwev_h_bu_b", "xvaddwod_h_bu_b", -+ "xvmulwev_h_b", "xvmulwod_h_b", "xvmulwev_h_bu", "xvmulwod_h_bu", -+ "xvmulwev_h_bu_b", "xvmulwod_h_bu_b"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v32i8_ty, llvm_v32i8_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvhaddw_w_h", "xvhaddw_wu_hu", "xvhsubw_w_h", "xvhsubw_wu_hu", -+ "xvaddwev_w_h", "xvaddwod_w_h", "xvsubwev_w_h", "xvsubwod_w_h", -+ "xvaddwev_w_hu", "xvaddwod_w_hu", "xvsubwev_w_hu", "xvsubwod_w_hu", -+ "xvaddwev_w_hu_h", "xvaddwod_w_hu_h", -+ "xvmulwev_w_h", "xvmulwod_w_h", "xvmulwev_w_hu", "xvmulwod_w_hu", -+ "xvmulwev_w_hu_h", "xvmulwod_w_hu_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvhaddw_d_w", "xvhaddw_du_wu", "xvhsubw_d_w", "xvhsubw_du_wu", -+ "xvaddwev_d_w", "xvaddwod_d_w", "xvsubwev_d_w", "xvsubwod_d_w", -+ "xvaddwev_d_wu", "xvaddwod_d_wu", "xvsubwev_d_wu", "xvsubwod_d_wu", -+ "xvaddwev_d_wu_w", "xvaddwod_d_wu_w", -+ "xvmulwev_d_w", "xvmulwod_d_w", "xvmulwev_d_wu", "xvmulwod_d_wu", -+ "xvmulwev_d_wu_w", "xvmulwod_d_wu_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsrln_b_h", "xvsran_b_h", "xvsrlrn_b_h", "xvsrarn_b_h", -+ "xvssrln_b_h", "xvssran_b_h", "xvssrln_bu_h", "xvssran_bu_h", -+ "xvssrlrn_b_h", "xvssrarn_b_h", "xvssrlrn_bu_h", "xvssrarn_bu_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsrln_h_w", "xvsran_h_w", "xvsrlrn_h_w", "xvsrarn_h_w", -+ "xvssrln_h_w", "xvssran_h_w", "xvssrln_hu_w", "xvssran_hu_w", -+ "xvssrlrn_h_w", "xvssrarn_h_w", "xvssrlrn_hu_w", "xvssrarn_hu_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsrln_w_d", "xvsran_w_d", "xvsrlrn_w_d", "xvsrarn_w_d", -+ "xvssrln_w_d", "xvssran_w_d", "xvssrln_wu_d", "xvssran_wu_d", -+ "xvssrlrn_w_d", "xvssrarn_w_d", "xvssrlrn_wu_d", "xvssrarn_wu_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvmadd_b", "xvmsub_b", "xvfrstp_b", "xvbitsel_v", "xvshuf_b"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v32i8_ty], -+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmadd_h", "xvmsub_h", "xvfrstp_h", "xvshuf_h"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmadd_w", "xvmsub_w", "xvshuf_w"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmadd_d", "xvmsub_d", "xvshuf_d"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsrlni_b_h", "xvsrani_b_h", "xvsrlrni_b_h", "xvsrarni_b_h", -+ "xvssrlni_b_h", "xvssrani_b_h", "xvssrlni_bu_h", "xvssrani_bu_h", -+ "xvssrlrni_b_h", "xvssrarni_b_h", "xvssrlrni_bu_h", "xvssrarni_bu_h", -+ "xvfrstpi_b", "xvbitseli_b", "xvextrins_b", "xvpermi_q"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v32i8_ty], -+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsrlni_h_w", "xvsrani_h_w", "xvsrlrni_h_w", "xvsrarni_h_w", -+ "xvssrlni_h_w", "xvssrani_h_w", "xvssrlni_hu_w", "xvssrani_hu_w", -+ "xvssrlrni_h_w", "xvssrarni_h_w", "xvssrlrni_hu_w", "xvssrarni_hu_w", -+ "xvfrstpi_h", "xvextrins_h"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsrlni_w_d", "xvsrani_w_d", "xvsrlrni_w_d", "xvsrarni_w_d", -+ "xvssrlni_w_d", "xvssrani_w_d", "xvssrlni_wu_d", "xvssrani_wu_d", -+ "xvssrlrni_w_d", "xvssrarni_w_d", "xvssrlrni_wu_d", "xvssrarni_wu_d", -+ "xvpermi_w", "xvextrins_w", "xvinsve0_w"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsrlni_d_q", "xvsrani_d_q", "xvsrlrni_d_q", "xvsrarni_d_q", -+ "xvssrlni_d_q", "xvssrani_d_q", "xvssrlni_du_q", "xvssrani_du_q", -+ "xvssrlrni_d_q", "xvssrarni_d_q", "xvssrlrni_du_q", "xvssrarni_du_q", -+ "xvshuf4i_d", "xvextrins_d", "xvinsve0_d"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["xvmaddwev_h_b", "xvmaddwod_h_b", "xvmaddwev_h_bu", -+ "xvmaddwod_h_bu", "xvmaddwev_h_bu_b", "xvmaddwod_h_bu_b"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmaddwev_w_h", "xvmaddwod_w_h", "xvmaddwev_w_hu", -+ "xvmaddwod_w_hu", "xvmaddwev_w_hu_h", "xvmaddwod_w_hu_h"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmaddwev_d_w", "xvmaddwod_d_w", "xvmaddwev_d_wu", -+ "xvmaddwod_d_wu", "xvmaddwev_d_wu_w", "xvmaddwod_d_wu_w"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmaddwev_q_d", "xvmaddwod_q_d", "xvmaddwev_q_du", -+ "xvmaddwod_q_du", "xvmaddwev_q_du_d", "xvmaddwod_q_du_d"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsllwil_h_b", "xvsllwil_hu_bu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v32i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsllwil_w_h", "xvsllwil_wu_hu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v16i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsllwil_d_w", "xvsllwil_du_wu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v8i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["xvneg_b", "xvmskltz_b", "xvmskgez_b", "xvmsknz_b", -+ "xvclo_b", "xvclz_b", "xvpcnt_b", -+ "xvreplve0_b", "xvreplve0_q"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvneg_h", "xvmskltz_h", "xvclo_h", "xvclz_h", "xvpcnt_h", -+ "xvreplve0_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvneg_w", "xvmskltz_w", "xvclo_w", "xvclz_w", "xvpcnt_w", -+ "xvreplve0_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvneg_d", "xvexth_q_d", "xvexth_qu_du", "xvmskltz_d", -+ "xvextl_q_d", "xvextl_qu_du", "xvclo_d", "xvclz_d", "xvpcnt_d", -+ "xvreplve0_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvexth_h_b", "xvexth_hu_bu", "vext2xv_h_b", "vext2xv_hu_bu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvexth_w_h", "xvexth_wu_hu", "vext2xv_w_h", "vext2xv_wu_hu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvexth_d_w", "xvexth_du_wu", "vext2xv_d_w", "vext2xv_du_wu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vext2xv_w_b", "vext2xv_wu_bu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vext2xv_d_h", "vext2xv_du_hu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vext2xv_d_b", "vext2xv_du_bu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lasx_xvldi : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvrepli_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvrepli_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvrepli_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvrepli_d : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lasx_xvreplgr2vr_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xvreplgr2vr_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xvreplgr2vr_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xvreplgr2vr_d : VecInt<[llvm_v4i64_ty], [llvm_i64_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lasx_xvinsgr2vr_w -+ : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvinsgr2vr_d -+ : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lasx_xvreplve_b -+ : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lasx_xvreplve_h -+ : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lasx_xvreplve_w -+ : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lasx_xvreplve_d -+ : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; -+ -+foreach inst = ["xvpickve2gr_w", "xvpickve2gr_wu" ] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_i32_ty], -+ [llvm_v8i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvpickve2gr_d", "xvpickve2gr_du" ] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_i64_ty], -+ [llvm_v4i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lasx_xbz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lasx_xbnz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbnz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbnz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbnz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbnz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+// LASX Float -+ -+foreach inst = ["xvfadd_s", "xvfsub_s", "xvfmul_s", "xvfdiv_s", -+ "xvfmax_s", "xvfmin_s", "xvfmaxa_s", "xvfmina_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], -+ [llvm_v8f32_ty, llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfadd_d", "xvfsub_d", "xvfmul_d", "xvfdiv_d", -+ "xvfmax_d", "xvfmin_d", "xvfmaxa_d", "xvfmina_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvfmadd_s", "xvfmsub_s", "xvfnmadd_s", "xvfnmsub_s"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v8f32_ty], -+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfmadd_d", "xvfmsub_d", "xvfnmadd_d", "xvfnmsub_d"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4f64_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvflogb_s", "xvfsqrt_s", "xvfrecip_s", "xvfrsqrt_s", "xvfrint_s", -+ "xvfrintrne_s", "xvfrintrz_s", "xvfrintrp_s", "xvfrintrm_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvflogb_d", "xvfsqrt_d", "xvfrecip_d", "xvfrsqrt_d", "xvfrint_d", -+ "xvfrintrne_d", "xvfrintrz_d", "xvfrintrp_d", "xvfrintrm_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvfcvtl_s_h", "xvfcvth_s_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfcvtl_d_s", "xvfcvth_d_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8f32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvftintrne_w_s", "xvftintrz_w_s", "xvftintrp_w_s", "xvftintrm_w_s", -+ "xvftint_w_s", "xvftintrz_wu_s", "xvftint_wu_s", "xvfclass_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvftintrne_l_d", "xvftintrz_l_d", "xvftintrp_l_d", "xvftintrm_l_d", -+ "xvftint_l_d", "xvftintrz_lu_d", "xvftint_lu_d", "xvfclass_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvftintrnel_l_s", "xvftintrneh_l_s", "xvftintrzl_l_s", -+ "xvftintrzh_l_s", "xvftintrpl_l_s", "xvftintrph_l_s", -+ "xvftintrml_l_s", "xvftintrmh_l_s", "xvftintl_l_s", -+ "xvftinth_l_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8f32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvffint_s_w", "xvffint_s_wu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvffint_d_l", "xvffint_d_lu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvffintl_d_w", "xvffinth_d_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvffint_s_l"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvftintrne_w_d", "xvftintrz_w_d", "xvftintrp_w_d", "xvftintrm_w_d", -+ "xvftint_w_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvfcvt_h_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v8f32_ty, llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfcvt_s_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvfcmp_caf_s", "xvfcmp_cun_s", "xvfcmp_ceq_s", "xvfcmp_cueq_s", -+ "xvfcmp_clt_s", "xvfcmp_cult_s", "xvfcmp_cle_s", "xvfcmp_cule_s", -+ "xvfcmp_cne_s", "xvfcmp_cor_s", "xvfcmp_cune_s", -+ "xvfcmp_saf_s", "xvfcmp_sun_s", "xvfcmp_seq_s", "xvfcmp_sueq_s", -+ "xvfcmp_slt_s", "xvfcmp_sult_s", "xvfcmp_sle_s", "xvfcmp_sule_s", -+ "xvfcmp_sne_s", "xvfcmp_sor_s", "xvfcmp_sune_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v8f32_ty, llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfcmp_caf_d", "xvfcmp_cun_d", "xvfcmp_ceq_d", "xvfcmp_cueq_d", -+ "xvfcmp_clt_d", "xvfcmp_cult_d", "xvfcmp_cle_d", "xvfcmp_cule_d", -+ "xvfcmp_cne_d", "xvfcmp_cor_d", "xvfcmp_cune_d", -+ "xvfcmp_saf_d", "xvfcmp_sun_d", "xvfcmp_seq_d", "xvfcmp_sueq_d", -+ "xvfcmp_slt_d", "xvfcmp_sult_d", "xvfcmp_sle_d", "xvfcmp_sule_d", -+ "xvfcmp_sne_d", "xvfcmp_sor_d", "xvfcmp_sune_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lasx_xvpickve_w_f -+ : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvpickve_d_f -+ : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+// LASX load/store -+def int_loongarch_lasx_xvld -+ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvldx -+ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], -+ [IntrReadMem, IntrArgMemOnly]>; -+def int_loongarch_lasx_xvldrepl_b -+ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvldrepl_h -+ : VecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvldrepl_w -+ : VecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvldrepl_d -+ : VecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+ -+def int_loongarch_lasx_xvst -+ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvstx -+ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], -+ [IntrWriteMem, IntrArgMemOnly]>; -+def int_loongarch_lasx_xvstelm_b -+ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lasx_xvstelm_h -+ : VecInt<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lasx_xvstelm_w -+ : VecInt<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lasx_xvstelm_d -+ : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+} // TargetPrefix = "loongarch" -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index c05133647929..3a40cd06a3eb 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -64,11 +64,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - - static const MVT::SimpleValueType LSXVTs[] = { - MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; -+ static const MVT::SimpleValueType LASXVTs[] = { -+ MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; - - if (Subtarget.hasExtLSX()) - for (MVT VT : LSXVTs) - addRegisterClass(VT, &LoongArch::LSX128RegClass); - -+ if (Subtarget.hasExtLASX()) -+ for (MVT VT : LASXVTs) -+ addRegisterClass(VT, &LoongArch::LASX256RegClass); -+ - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, - MVT::i1, Promote); - -@@ -207,6 +213,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, - {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); - -+ if (Subtarget.hasExtLASX()) -+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, -+ {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, -+ Legal); -+ - // Compute derived properties from the register classes. - computeRegisterProperties(Subtarget.getRegisterInfo()); - -@@ -695,9 +706,17 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vpickve2gr_d: - case Intrinsic::loongarch_lsx_vpickve2gr_du: - case Intrinsic::loongarch_lsx_vreplvei_d: -+ case Intrinsic::loongarch_lasx_xvrepl128vei_d: - return checkIntrinsicImmArg<1>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vreplvei_w: -+ case Intrinsic::loongarch_lasx_xvrepl128vei_w: -+ case Intrinsic::loongarch_lasx_xvpickve2gr_d: -+ case Intrinsic::loongarch_lasx_xvpickve2gr_du: -+ case Intrinsic::loongarch_lasx_xvpickve_d: -+ case Intrinsic::loongarch_lasx_xvpickve_d_f: - return checkIntrinsicImmArg<2>(Op, 2, DAG); -+ case Intrinsic::loongarch_lasx_xvinsve0_d: -+ return checkIntrinsicImmArg<2>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsat_b: - case Intrinsic::loongarch_lsx_vsat_bu: - case Intrinsic::loongarch_lsx_vrotri_b: -@@ -706,7 +725,19 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vsrlri_b: - case Intrinsic::loongarch_lsx_vsrari_b: - case Intrinsic::loongarch_lsx_vreplvei_h: -+ case Intrinsic::loongarch_lasx_xvsat_b: -+ case Intrinsic::loongarch_lasx_xvsat_bu: -+ case Intrinsic::loongarch_lasx_xvrotri_b: -+ case Intrinsic::loongarch_lasx_xvsllwil_h_b: -+ case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: -+ case Intrinsic::loongarch_lasx_xvsrlri_b: -+ case Intrinsic::loongarch_lasx_xvsrari_b: -+ case Intrinsic::loongarch_lasx_xvrepl128vei_h: -+ case Intrinsic::loongarch_lasx_xvpickve_w: -+ case Intrinsic::loongarch_lasx_xvpickve_w_f: - return checkIntrinsicImmArg<3>(Op, 2, DAG); -+ case Intrinsic::loongarch_lasx_xvinsve0_w: -+ return checkIntrinsicImmArg<3>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsat_h: - case Intrinsic::loongarch_lsx_vsat_hu: - case Intrinsic::loongarch_lsx_vrotri_h: -@@ -715,6 +746,14 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vsrlri_h: - case Intrinsic::loongarch_lsx_vsrari_h: - case Intrinsic::loongarch_lsx_vreplvei_b: -+ case Intrinsic::loongarch_lasx_xvsat_h: -+ case Intrinsic::loongarch_lasx_xvsat_hu: -+ case Intrinsic::loongarch_lasx_xvrotri_h: -+ case Intrinsic::loongarch_lasx_xvsllwil_w_h: -+ case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: -+ case Intrinsic::loongarch_lasx_xvsrlri_h: -+ case Intrinsic::loongarch_lasx_xvsrari_h: -+ case Intrinsic::loongarch_lasx_xvrepl128vei_b: - return checkIntrinsicImmArg<4>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vsrlni_b_h: - case Intrinsic::loongarch_lsx_vsrani_b_h: -@@ -728,6 +767,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vssrarni_b_h: - case Intrinsic::loongarch_lsx_vssrlrni_bu_h: - case Intrinsic::loongarch_lsx_vssrarni_bu_h: -+ case Intrinsic::loongarch_lasx_xvsrlni_b_h: -+ case Intrinsic::loongarch_lasx_xvsrani_b_h: -+ case Intrinsic::loongarch_lasx_xvsrlrni_b_h: -+ case Intrinsic::loongarch_lasx_xvsrarni_b_h: -+ case Intrinsic::loongarch_lasx_xvssrlni_b_h: -+ case Intrinsic::loongarch_lasx_xvssrani_b_h: -+ case Intrinsic::loongarch_lasx_xvssrlni_bu_h: -+ case Intrinsic::loongarch_lasx_xvssrani_bu_h: -+ case Intrinsic::loongarch_lasx_xvssrlrni_b_h: -+ case Intrinsic::loongarch_lasx_xvssrarni_b_h: -+ case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: -+ case Intrinsic::loongarch_lasx_xvssrarni_bu_h: - return checkIntrinsicImmArg<4>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsat_w: - case Intrinsic::loongarch_lsx_vsat_wu: -@@ -746,6 +797,23 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vslti_du: - case Intrinsic::loongarch_lsx_vbsll_v: - case Intrinsic::loongarch_lsx_vbsrl_v: -+ case Intrinsic::loongarch_lasx_xvsat_w: -+ case Intrinsic::loongarch_lasx_xvsat_wu: -+ case Intrinsic::loongarch_lasx_xvrotri_w: -+ case Intrinsic::loongarch_lasx_xvsllwil_d_w: -+ case Intrinsic::loongarch_lasx_xvsllwil_du_wu: -+ case Intrinsic::loongarch_lasx_xvsrlri_w: -+ case Intrinsic::loongarch_lasx_xvsrari_w: -+ case Intrinsic::loongarch_lasx_xvslei_bu: -+ case Intrinsic::loongarch_lasx_xvslei_hu: -+ case Intrinsic::loongarch_lasx_xvslei_wu: -+ case Intrinsic::loongarch_lasx_xvslei_du: -+ case Intrinsic::loongarch_lasx_xvslti_bu: -+ case Intrinsic::loongarch_lasx_xvslti_hu: -+ case Intrinsic::loongarch_lasx_xvslti_wu: -+ case Intrinsic::loongarch_lasx_xvslti_du: -+ case Intrinsic::loongarch_lasx_xvbsll_v: -+ case Intrinsic::loongarch_lasx_xvbsrl_v: - return checkIntrinsicImmArg<5>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vseqi_b: - case Intrinsic::loongarch_lsx_vseqi_h: -@@ -759,6 +827,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vslti_h: - case Intrinsic::loongarch_lsx_vslti_w: - case Intrinsic::loongarch_lsx_vslti_d: -+ case Intrinsic::loongarch_lasx_xvseqi_b: -+ case Intrinsic::loongarch_lasx_xvseqi_h: -+ case Intrinsic::loongarch_lasx_xvseqi_w: -+ case Intrinsic::loongarch_lasx_xvseqi_d: -+ case Intrinsic::loongarch_lasx_xvslei_b: -+ case Intrinsic::loongarch_lasx_xvslei_h: -+ case Intrinsic::loongarch_lasx_xvslei_w: -+ case Intrinsic::loongarch_lasx_xvslei_d: -+ case Intrinsic::loongarch_lasx_xvslti_b: -+ case Intrinsic::loongarch_lasx_xvslti_h: -+ case Intrinsic::loongarch_lasx_xvslti_w: -+ case Intrinsic::loongarch_lasx_xvslti_d: - return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); - case Intrinsic::loongarch_lsx_vsrlni_h_w: - case Intrinsic::loongarch_lsx_vsrani_h_w: -@@ -774,12 +854,31 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vssrarni_hu_w: - case Intrinsic::loongarch_lsx_vfrstpi_b: - case Intrinsic::loongarch_lsx_vfrstpi_h: -+ case Intrinsic::loongarch_lasx_xvsrlni_h_w: -+ case Intrinsic::loongarch_lasx_xvsrani_h_w: -+ case Intrinsic::loongarch_lasx_xvsrlrni_h_w: -+ case Intrinsic::loongarch_lasx_xvsrarni_h_w: -+ case Intrinsic::loongarch_lasx_xvssrlni_h_w: -+ case Intrinsic::loongarch_lasx_xvssrani_h_w: -+ case Intrinsic::loongarch_lasx_xvssrlni_hu_w: -+ case Intrinsic::loongarch_lasx_xvssrani_hu_w: -+ case Intrinsic::loongarch_lasx_xvssrlrni_h_w: -+ case Intrinsic::loongarch_lasx_xvssrarni_h_w: -+ case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: -+ case Intrinsic::loongarch_lasx_xvssrarni_hu_w: -+ case Intrinsic::loongarch_lasx_xvfrstpi_b: -+ case Intrinsic::loongarch_lasx_xvfrstpi_h: - return checkIntrinsicImmArg<5>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsat_d: - case Intrinsic::loongarch_lsx_vsat_du: - case Intrinsic::loongarch_lsx_vrotri_d: - case Intrinsic::loongarch_lsx_vsrlri_d: - case Intrinsic::loongarch_lsx_vsrari_d: -+ case Intrinsic::loongarch_lasx_xvsat_d: -+ case Intrinsic::loongarch_lasx_xvsat_du: -+ case Intrinsic::loongarch_lasx_xvrotri_d: -+ case Intrinsic::loongarch_lasx_xvsrlri_d: -+ case Intrinsic::loongarch_lasx_xvsrari_d: - return checkIntrinsicImmArg<6>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vsrlni_w_d: - case Intrinsic::loongarch_lsx_vsrani_w_d: -@@ -793,6 +892,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vssrarni_w_d: - case Intrinsic::loongarch_lsx_vssrlrni_wu_d: - case Intrinsic::loongarch_lsx_vssrarni_wu_d: -+ case Intrinsic::loongarch_lasx_xvsrlni_w_d: -+ case Intrinsic::loongarch_lasx_xvsrani_w_d: -+ case Intrinsic::loongarch_lasx_xvsrlrni_w_d: -+ case Intrinsic::loongarch_lasx_xvsrarni_w_d: -+ case Intrinsic::loongarch_lasx_xvssrlni_w_d: -+ case Intrinsic::loongarch_lasx_xvssrani_w_d: -+ case Intrinsic::loongarch_lasx_xvssrlni_wu_d: -+ case Intrinsic::loongarch_lasx_xvssrani_wu_d: -+ case Intrinsic::loongarch_lasx_xvssrlrni_w_d: -+ case Intrinsic::loongarch_lasx_xvssrarni_w_d: -+ case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: -+ case Intrinsic::loongarch_lasx_xvssrarni_wu_d: - return checkIntrinsicImmArg<6>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsrlni_d_q: - case Intrinsic::loongarch_lsx_vsrani_d_q: -@@ -806,11 +917,28 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vssrarni_d_q: - case Intrinsic::loongarch_lsx_vssrlrni_du_q: - case Intrinsic::loongarch_lsx_vssrarni_du_q: -+ case Intrinsic::loongarch_lasx_xvsrlni_d_q: -+ case Intrinsic::loongarch_lasx_xvsrani_d_q: -+ case Intrinsic::loongarch_lasx_xvsrlrni_d_q: -+ case Intrinsic::loongarch_lasx_xvsrarni_d_q: -+ case Intrinsic::loongarch_lasx_xvssrlni_d_q: -+ case Intrinsic::loongarch_lasx_xvssrani_d_q: -+ case Intrinsic::loongarch_lasx_xvssrlni_du_q: -+ case Intrinsic::loongarch_lasx_xvssrani_du_q: -+ case Intrinsic::loongarch_lasx_xvssrlrni_d_q: -+ case Intrinsic::loongarch_lasx_xvssrarni_d_q: -+ case Intrinsic::loongarch_lasx_xvssrlrni_du_q: -+ case Intrinsic::loongarch_lasx_xvssrarni_du_q: - return checkIntrinsicImmArg<7>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vnori_b: - case Intrinsic::loongarch_lsx_vshuf4i_b: - case Intrinsic::loongarch_lsx_vshuf4i_h: - case Intrinsic::loongarch_lsx_vshuf4i_w: -+ case Intrinsic::loongarch_lasx_xvnori_b: -+ case Intrinsic::loongarch_lasx_xvshuf4i_b: -+ case Intrinsic::loongarch_lasx_xvshuf4i_h: -+ case Intrinsic::loongarch_lasx_xvshuf4i_w: -+ case Intrinsic::loongarch_lasx_xvpermi_d: - return checkIntrinsicImmArg<8>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vshuf4i_d: - case Intrinsic::loongarch_lsx_vpermi_w: -@@ -819,13 +947,26 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vextrins_h: - case Intrinsic::loongarch_lsx_vextrins_w: - case Intrinsic::loongarch_lsx_vextrins_d: -+ case Intrinsic::loongarch_lasx_xvshuf4i_d: -+ case Intrinsic::loongarch_lasx_xvpermi_w: -+ case Intrinsic::loongarch_lasx_xvpermi_q: -+ case Intrinsic::loongarch_lasx_xvbitseli_b: -+ case Intrinsic::loongarch_lasx_xvextrins_b: -+ case Intrinsic::loongarch_lasx_xvextrins_h: -+ case Intrinsic::loongarch_lasx_xvextrins_w: -+ case Intrinsic::loongarch_lasx_xvextrins_d: - return checkIntrinsicImmArg<8>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vrepli_b: - case Intrinsic::loongarch_lsx_vrepli_h: - case Intrinsic::loongarch_lsx_vrepli_w: - case Intrinsic::loongarch_lsx_vrepli_d: -+ case Intrinsic::loongarch_lasx_xvrepli_b: -+ case Intrinsic::loongarch_lasx_xvrepli_h: -+ case Intrinsic::loongarch_lasx_xvrepli_w: -+ case Intrinsic::loongarch_lasx_xvrepli_d: - return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); - case Intrinsic::loongarch_lsx_vldi: -+ case Intrinsic::loongarch_lasx_xvldi: - return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); - } - } -@@ -924,22 +1065,27 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, - } - case Intrinsic::loongarch_lsx_vld: - case Intrinsic::loongarch_lsx_vldrepl_b: -+ case Intrinsic::loongarch_lasx_xvld: -+ case Intrinsic::loongarch_lasx_xvldrepl_b: - return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) - ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) - : SDValue(); - case Intrinsic::loongarch_lsx_vldrepl_h: -+ case Intrinsic::loongarch_lasx_xvldrepl_h: - return !isShiftedInt<11, 1>( - cast(Op.getOperand(3))->getSExtValue()) - ? emitIntrinsicWithChainErrorMessage( - Op, "argument out of range or not a multiple of 2", DAG) - : SDValue(); - case Intrinsic::loongarch_lsx_vldrepl_w: -+ case Intrinsic::loongarch_lasx_xvldrepl_w: - return !isShiftedInt<10, 2>( - cast(Op.getOperand(3))->getSExtValue()) - ? emitIntrinsicWithChainErrorMessage( - Op, "argument out of range or not a multiple of 4", DAG) - : SDValue(); - case Intrinsic::loongarch_lsx_vldrepl_d: -+ case Intrinsic::loongarch_lasx_xvldrepl_d: - return !isShiftedInt<9, 3>( - cast(Op.getOperand(3))->getSExtValue()) - ? emitIntrinsicWithChainErrorMessage( -@@ -1064,14 +1210,27 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - : Op; - } - case Intrinsic::loongarch_lsx_vst: -+ case Intrinsic::loongarch_lasx_xvst: - return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) - ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) - : SDValue(); -+ case Intrinsic::loongarch_lasx_xvstelm_b: -+ return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<5>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : SDValue(); - case Intrinsic::loongarch_lsx_vstelm_b: - return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || - !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) - ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) - : SDValue(); -+ case Intrinsic::loongarch_lasx_xvstelm_h: -+ return (!isShiftedInt<8, 1>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 2", DAG) -+ : SDValue(); - case Intrinsic::loongarch_lsx_vstelm_h: - return (!isShiftedInt<8, 1>( - cast(Op.getOperand(4))->getSExtValue()) || -@@ -1079,6 +1238,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - ? emitIntrinsicErrorMessage( - Op, "argument out of range or not a multiple of 2", DAG) - : SDValue(); -+ case Intrinsic::loongarch_lasx_xvstelm_w: -+ return (!isShiftedInt<8, 2>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 4", DAG) -+ : SDValue(); - case Intrinsic::loongarch_lsx_vstelm_w: - return (!isShiftedInt<8, 2>( - cast(Op.getOperand(4))->getSExtValue()) || -@@ -1086,6 +1252,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - ? emitIntrinsicErrorMessage( - Op, "argument out of range or not a multiple of 4", DAG) - : SDValue(); -+ case Intrinsic::loongarch_lasx_xvstelm_d: -+ return (!isShiftedInt<8, 3>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 8", DAG) -+ : SDValue(); - case Intrinsic::loongarch_lsx_vstelm_d: - return (!isShiftedInt<8, 3>( - cast(Op.getOperand(4))->getSExtValue()) || -@@ -1304,6 +1477,7 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, - LoongArchISD::VPICK_SEXT_ELT); - break; - case Intrinsic::loongarch_lsx_vpickve2gr_h: -+ case Intrinsic::loongarch_lasx_xvpickve2gr_w: - replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, - LoongArchISD::VPICK_SEXT_ELT); - break; -@@ -1316,6 +1490,7 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, - LoongArchISD::VPICK_ZEXT_ELT); - break; - case Intrinsic::loongarch_lsx_vpickve2gr_hu: -+ case Intrinsic::loongarch_lasx_xvpickve2gr_wu: - replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, - LoongArchISD::VPICK_ZEXT_ELT); - break; -@@ -1327,10 +1502,15 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, - case Intrinsic::loongarch_lsx_bz_h: - case Intrinsic::loongarch_lsx_bz_w: - case Intrinsic::loongarch_lsx_bz_d: -+ case Intrinsic::loongarch_lasx_xbz_b: -+ case Intrinsic::loongarch_lasx_xbz_h: -+ case Intrinsic::loongarch_lasx_xbz_w: -+ case Intrinsic::loongarch_lasx_xbz_d: - replaceVecCondBranchResults(N, Results, DAG, Subtarget, - LoongArchISD::VALL_ZERO); - break; - case Intrinsic::loongarch_lsx_bz_v: -+ case Intrinsic::loongarch_lasx_xbz_v: - replaceVecCondBranchResults(N, Results, DAG, Subtarget, - LoongArchISD::VANY_ZERO); - break; -@@ -1338,10 +1518,15 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, - case Intrinsic::loongarch_lsx_bnz_h: - case Intrinsic::loongarch_lsx_bnz_w: - case Intrinsic::loongarch_lsx_bnz_d: -+ case Intrinsic::loongarch_lasx_xbnz_b: -+ case Intrinsic::loongarch_lasx_xbnz_h: -+ case Intrinsic::loongarch_lasx_xbnz_w: -+ case Intrinsic::loongarch_lasx_xbnz_d: - replaceVecCondBranchResults(N, Results, DAG, Subtarget, - LoongArchISD::VALL_NONZERO); - break; - case Intrinsic::loongarch_lsx_bnz_v: -+ case Intrinsic::loongarch_lasx_xbnz_v: - replaceVecCondBranchResults(N, Results, DAG, Subtarget, - LoongArchISD::VANY_NONZERO); - break; -@@ -2114,30 +2299,50 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vadd_h: - case Intrinsic::loongarch_lsx_vadd_w: - case Intrinsic::loongarch_lsx_vadd_d: -+ case Intrinsic::loongarch_lasx_xvadd_b: -+ case Intrinsic::loongarch_lasx_xvadd_h: -+ case Intrinsic::loongarch_lasx_xvadd_w: -+ case Intrinsic::loongarch_lasx_xvadd_d: - return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vaddi_bu: - case Intrinsic::loongarch_lsx_vaddi_hu: - case Intrinsic::loongarch_lsx_vaddi_wu: - case Intrinsic::loongarch_lsx_vaddi_du: -+ case Intrinsic::loongarch_lasx_xvaddi_bu: -+ case Intrinsic::loongarch_lasx_xvaddi_hu: -+ case Intrinsic::loongarch_lasx_xvaddi_wu: -+ case Intrinsic::loongarch_lasx_xvaddi_du: - return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsub_b: - case Intrinsic::loongarch_lsx_vsub_h: - case Intrinsic::loongarch_lsx_vsub_w: - case Intrinsic::loongarch_lsx_vsub_d: -+ case Intrinsic::loongarch_lasx_xvsub_b: -+ case Intrinsic::loongarch_lasx_xvsub_h: -+ case Intrinsic::loongarch_lasx_xvsub_w: -+ case Intrinsic::loongarch_lasx_xvsub_d: - return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vsubi_bu: - case Intrinsic::loongarch_lsx_vsubi_hu: - case Intrinsic::loongarch_lsx_vsubi_wu: - case Intrinsic::loongarch_lsx_vsubi_du: -+ case Intrinsic::loongarch_lasx_xvsubi_bu: -+ case Intrinsic::loongarch_lasx_xvsubi_hu: -+ case Intrinsic::loongarch_lasx_xvsubi_wu: -+ case Intrinsic::loongarch_lasx_xvsubi_du: - return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vneg_b: - case Intrinsic::loongarch_lsx_vneg_h: - case Intrinsic::loongarch_lsx_vneg_w: - case Intrinsic::loongarch_lsx_vneg_d: -+ case Intrinsic::loongarch_lasx_xvneg_b: -+ case Intrinsic::loongarch_lasx_xvneg_h: -+ case Intrinsic::loongarch_lasx_xvneg_w: -+ case Intrinsic::loongarch_lasx_xvneg_d: - return DAG.getNode( - ISD::SUB, DL, N->getValueType(0), - DAG.getConstant( -@@ -2149,60 +2354,100 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vmax_h: - case Intrinsic::loongarch_lsx_vmax_w: - case Intrinsic::loongarch_lsx_vmax_d: -+ case Intrinsic::loongarch_lasx_xvmax_b: -+ case Intrinsic::loongarch_lasx_xvmax_h: -+ case Intrinsic::loongarch_lasx_xvmax_w: -+ case Intrinsic::loongarch_lasx_xvmax_d: - return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmax_bu: - case Intrinsic::loongarch_lsx_vmax_hu: - case Intrinsic::loongarch_lsx_vmax_wu: - case Intrinsic::loongarch_lsx_vmax_du: -+ case Intrinsic::loongarch_lasx_xvmax_bu: -+ case Intrinsic::loongarch_lasx_xvmax_hu: -+ case Intrinsic::loongarch_lasx_xvmax_wu: -+ case Intrinsic::loongarch_lasx_xvmax_du: - return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmaxi_b: - case Intrinsic::loongarch_lsx_vmaxi_h: - case Intrinsic::loongarch_lsx_vmaxi_w: - case Intrinsic::loongarch_lsx_vmaxi_d: -+ case Intrinsic::loongarch_lasx_xvmaxi_b: -+ case Intrinsic::loongarch_lasx_xvmaxi_h: -+ case Intrinsic::loongarch_lasx_xvmaxi_w: -+ case Intrinsic::loongarch_lasx_xvmaxi_d: - return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); - case Intrinsic::loongarch_lsx_vmaxi_bu: - case Intrinsic::loongarch_lsx_vmaxi_hu: - case Intrinsic::loongarch_lsx_vmaxi_wu: - case Intrinsic::loongarch_lsx_vmaxi_du: -+ case Intrinsic::loongarch_lasx_xvmaxi_bu: -+ case Intrinsic::loongarch_lasx_xvmaxi_hu: -+ case Intrinsic::loongarch_lasx_xvmaxi_wu: -+ case Intrinsic::loongarch_lasx_xvmaxi_du: - return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vmin_b: - case Intrinsic::loongarch_lsx_vmin_h: - case Intrinsic::loongarch_lsx_vmin_w: - case Intrinsic::loongarch_lsx_vmin_d: -+ case Intrinsic::loongarch_lasx_xvmin_b: -+ case Intrinsic::loongarch_lasx_xvmin_h: -+ case Intrinsic::loongarch_lasx_xvmin_w: -+ case Intrinsic::loongarch_lasx_xvmin_d: - return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmin_bu: - case Intrinsic::loongarch_lsx_vmin_hu: - case Intrinsic::loongarch_lsx_vmin_wu: - case Intrinsic::loongarch_lsx_vmin_du: -+ case Intrinsic::loongarch_lasx_xvmin_bu: -+ case Intrinsic::loongarch_lasx_xvmin_hu: -+ case Intrinsic::loongarch_lasx_xvmin_wu: -+ case Intrinsic::loongarch_lasx_xvmin_du: - return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmini_b: - case Intrinsic::loongarch_lsx_vmini_h: - case Intrinsic::loongarch_lsx_vmini_w: - case Intrinsic::loongarch_lsx_vmini_d: -+ case Intrinsic::loongarch_lasx_xvmini_b: -+ case Intrinsic::loongarch_lasx_xvmini_h: -+ case Intrinsic::loongarch_lasx_xvmini_w: -+ case Intrinsic::loongarch_lasx_xvmini_d: - return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); - case Intrinsic::loongarch_lsx_vmini_bu: - case Intrinsic::loongarch_lsx_vmini_hu: - case Intrinsic::loongarch_lsx_vmini_wu: - case Intrinsic::loongarch_lsx_vmini_du: -+ case Intrinsic::loongarch_lasx_xvmini_bu: -+ case Intrinsic::loongarch_lasx_xvmini_hu: -+ case Intrinsic::loongarch_lasx_xvmini_wu: -+ case Intrinsic::loongarch_lasx_xvmini_du: - return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vmul_b: - case Intrinsic::loongarch_lsx_vmul_h: - case Intrinsic::loongarch_lsx_vmul_w: - case Intrinsic::loongarch_lsx_vmul_d: -+ case Intrinsic::loongarch_lasx_xvmul_b: -+ case Intrinsic::loongarch_lasx_xvmul_h: -+ case Intrinsic::loongarch_lasx_xvmul_w: -+ case Intrinsic::loongarch_lasx_xvmul_d: - return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmadd_b: - case Intrinsic::loongarch_lsx_vmadd_h: - case Intrinsic::loongarch_lsx_vmadd_w: -- case Intrinsic::loongarch_lsx_vmadd_d: { -+ case Intrinsic::loongarch_lsx_vmadd_d: -+ case Intrinsic::loongarch_lasx_xvmadd_b: -+ case Intrinsic::loongarch_lasx_xvmadd_h: -+ case Intrinsic::loongarch_lasx_xvmadd_w: -+ case Intrinsic::loongarch_lasx_xvmadd_d: { - EVT ResTy = N->getValueType(0); - return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), - DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), -@@ -2211,7 +2456,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vmsub_b: - case Intrinsic::loongarch_lsx_vmsub_h: - case Intrinsic::loongarch_lsx_vmsub_w: -- case Intrinsic::loongarch_lsx_vmsub_d: { -+ case Intrinsic::loongarch_lsx_vmsub_d: -+ case Intrinsic::loongarch_lasx_xvmsub_b: -+ case Intrinsic::loongarch_lasx_xvmsub_h: -+ case Intrinsic::loongarch_lasx_xvmsub_w: -+ case Intrinsic::loongarch_lasx_xvmsub_d: { - EVT ResTy = N->getValueType(0); - return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), - DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), -@@ -2221,125 +2470,188 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vdiv_h: - case Intrinsic::loongarch_lsx_vdiv_w: - case Intrinsic::loongarch_lsx_vdiv_d: -+ case Intrinsic::loongarch_lasx_xvdiv_b: -+ case Intrinsic::loongarch_lasx_xvdiv_h: -+ case Intrinsic::loongarch_lasx_xvdiv_w: -+ case Intrinsic::loongarch_lasx_xvdiv_d: - return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vdiv_bu: - case Intrinsic::loongarch_lsx_vdiv_hu: - case Intrinsic::loongarch_lsx_vdiv_wu: - case Intrinsic::loongarch_lsx_vdiv_du: -+ case Intrinsic::loongarch_lasx_xvdiv_bu: -+ case Intrinsic::loongarch_lasx_xvdiv_hu: -+ case Intrinsic::loongarch_lasx_xvdiv_wu: -+ case Intrinsic::loongarch_lasx_xvdiv_du: - return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmod_b: - case Intrinsic::loongarch_lsx_vmod_h: - case Intrinsic::loongarch_lsx_vmod_w: - case Intrinsic::loongarch_lsx_vmod_d: -+ case Intrinsic::loongarch_lasx_xvmod_b: -+ case Intrinsic::loongarch_lasx_xvmod_h: -+ case Intrinsic::loongarch_lasx_xvmod_w: -+ case Intrinsic::loongarch_lasx_xvmod_d: - return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmod_bu: - case Intrinsic::loongarch_lsx_vmod_hu: - case Intrinsic::loongarch_lsx_vmod_wu: - case Intrinsic::loongarch_lsx_vmod_du: -+ case Intrinsic::loongarch_lasx_xvmod_bu: -+ case Intrinsic::loongarch_lasx_xvmod_hu: -+ case Intrinsic::loongarch_lasx_xvmod_wu: -+ case Intrinsic::loongarch_lasx_xvmod_du: - return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vand_v: -+ case Intrinsic::loongarch_lasx_xvand_v: - return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vor_v: -+ case Intrinsic::loongarch_lasx_xvor_v: - return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vxor_v: -+ case Intrinsic::loongarch_lasx_xvxor_v: - return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); -- case Intrinsic::loongarch_lsx_vnor_v: { -+ case Intrinsic::loongarch_lsx_vnor_v: -+ case Intrinsic::loongarch_lasx_xvnor_v: { - SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - return DAG.getNOT(DL, Res, Res->getValueType(0)); - } - case Intrinsic::loongarch_lsx_vandi_b: -+ case Intrinsic::loongarch_lasx_xvandi_b: - return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<8>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vori_b: -+ case Intrinsic::loongarch_lasx_xvori_b: - return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<8>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vxori_b: -+ case Intrinsic::loongarch_lasx_xvxori_b: - return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<8>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsll_b: - case Intrinsic::loongarch_lsx_vsll_h: - case Intrinsic::loongarch_lsx_vsll_w: - case Intrinsic::loongarch_lsx_vsll_d: -+ case Intrinsic::loongarch_lasx_xvsll_b: -+ case Intrinsic::loongarch_lasx_xvsll_h: -+ case Intrinsic::loongarch_lasx_xvsll_w: -+ case Intrinsic::loongarch_lasx_xvsll_d: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - truncateVecElts(N, DAG)); - case Intrinsic::loongarch_lsx_vslli_b: -+ case Intrinsic::loongarch_lasx_xvslli_b: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<3>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vslli_h: -+ case Intrinsic::loongarch_lasx_xvslli_h: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<4>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vslli_w: -+ case Intrinsic::loongarch_lasx_xvslli_w: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vslli_d: -+ case Intrinsic::loongarch_lasx_xvslli_d: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<6>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrl_b: - case Intrinsic::loongarch_lsx_vsrl_h: - case Intrinsic::loongarch_lsx_vsrl_w: - case Intrinsic::loongarch_lsx_vsrl_d: -+ case Intrinsic::loongarch_lasx_xvsrl_b: -+ case Intrinsic::loongarch_lasx_xvsrl_h: -+ case Intrinsic::loongarch_lasx_xvsrl_w: -+ case Intrinsic::loongarch_lasx_xvsrl_d: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - truncateVecElts(N, DAG)); - case Intrinsic::loongarch_lsx_vsrli_b: -+ case Intrinsic::loongarch_lasx_xvsrli_b: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<3>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrli_h: -+ case Intrinsic::loongarch_lasx_xvsrli_h: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<4>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrli_w: -+ case Intrinsic::loongarch_lasx_xvsrli_w: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrli_d: -+ case Intrinsic::loongarch_lasx_xvsrli_d: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<6>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsra_b: - case Intrinsic::loongarch_lsx_vsra_h: - case Intrinsic::loongarch_lsx_vsra_w: - case Intrinsic::loongarch_lsx_vsra_d: -+ case Intrinsic::loongarch_lasx_xvsra_b: -+ case Intrinsic::loongarch_lasx_xvsra_h: -+ case Intrinsic::loongarch_lasx_xvsra_w: -+ case Intrinsic::loongarch_lasx_xvsra_d: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - truncateVecElts(N, DAG)); - case Intrinsic::loongarch_lsx_vsrai_b: -+ case Intrinsic::loongarch_lasx_xvsrai_b: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<3>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrai_h: -+ case Intrinsic::loongarch_lasx_xvsrai_h: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<4>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrai_w: -+ case Intrinsic::loongarch_lasx_xvsrai_w: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrai_d: -+ case Intrinsic::loongarch_lasx_xvsrai_d: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<6>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vpcnt_b: - case Intrinsic::loongarch_lsx_vpcnt_h: - case Intrinsic::loongarch_lsx_vpcnt_w: - case Intrinsic::loongarch_lsx_vpcnt_d: -+ case Intrinsic::loongarch_lasx_xvpcnt_b: -+ case Intrinsic::loongarch_lasx_xvpcnt_h: -+ case Intrinsic::loongarch_lasx_xvpcnt_w: -+ case Intrinsic::loongarch_lasx_xvpcnt_d: - return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); - case Intrinsic::loongarch_lsx_vbitclr_b: - case Intrinsic::loongarch_lsx_vbitclr_h: - case Intrinsic::loongarch_lsx_vbitclr_w: - case Intrinsic::loongarch_lsx_vbitclr_d: -+ case Intrinsic::loongarch_lasx_xvbitclr_b: -+ case Intrinsic::loongarch_lasx_xvbitclr_h: -+ case Intrinsic::loongarch_lasx_xvbitclr_w: -+ case Intrinsic::loongarch_lasx_xvbitclr_d: - return lowerVectorBitClear(N, DAG); - case Intrinsic::loongarch_lsx_vbitclri_b: -+ case Intrinsic::loongarch_lasx_xvbitclri_b: - return lowerVectorBitClearImm<3>(N, DAG); - case Intrinsic::loongarch_lsx_vbitclri_h: -+ case Intrinsic::loongarch_lasx_xvbitclri_h: - return lowerVectorBitClearImm<4>(N, DAG); - case Intrinsic::loongarch_lsx_vbitclri_w: -+ case Intrinsic::loongarch_lasx_xvbitclri_w: - return lowerVectorBitClearImm<5>(N, DAG); - case Intrinsic::loongarch_lsx_vbitclri_d: -+ case Intrinsic::loongarch_lasx_xvbitclri_d: - return lowerVectorBitClearImm<6>(N, DAG); - case Intrinsic::loongarch_lsx_vbitset_b: - case Intrinsic::loongarch_lsx_vbitset_h: - case Intrinsic::loongarch_lsx_vbitset_w: -- case Intrinsic::loongarch_lsx_vbitset_d: { -+ case Intrinsic::loongarch_lsx_vbitset_d: -+ case Intrinsic::loongarch_lasx_xvbitset_b: -+ case Intrinsic::loongarch_lasx_xvbitset_h: -+ case Intrinsic::loongarch_lasx_xvbitset_w: -+ case Intrinsic::loongarch_lasx_xvbitset_d: { - EVT VecTy = N->getValueType(0); - SDValue One = DAG.getConstant(1, DL, VecTy); - return DAG.getNode( -@@ -2347,17 +2659,25 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); - } - case Intrinsic::loongarch_lsx_vbitseti_b: -+ case Intrinsic::loongarch_lasx_xvbitseti_b: - return lowerVectorBitSetImm<3>(N, DAG); - case Intrinsic::loongarch_lsx_vbitseti_h: -+ case Intrinsic::loongarch_lasx_xvbitseti_h: - return lowerVectorBitSetImm<4>(N, DAG); - case Intrinsic::loongarch_lsx_vbitseti_w: -+ case Intrinsic::loongarch_lasx_xvbitseti_w: - return lowerVectorBitSetImm<5>(N, DAG); - case Intrinsic::loongarch_lsx_vbitseti_d: -+ case Intrinsic::loongarch_lasx_xvbitseti_d: - return lowerVectorBitSetImm<6>(N, DAG); - case Intrinsic::loongarch_lsx_vbitrev_b: - case Intrinsic::loongarch_lsx_vbitrev_h: - case Intrinsic::loongarch_lsx_vbitrev_w: -- case Intrinsic::loongarch_lsx_vbitrev_d: { -+ case Intrinsic::loongarch_lsx_vbitrev_d: -+ case Intrinsic::loongarch_lasx_xvbitrev_b: -+ case Intrinsic::loongarch_lasx_xvbitrev_h: -+ case Intrinsic::loongarch_lasx_xvbitrev_w: -+ case Intrinsic::loongarch_lasx_xvbitrev_d: { - EVT VecTy = N->getValueType(0); - SDValue One = DAG.getConstant(1, DL, VecTy); - return DAG.getNode( -@@ -2365,31 +2685,45 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); - } - case Intrinsic::loongarch_lsx_vbitrevi_b: -+ case Intrinsic::loongarch_lasx_xvbitrevi_b: - return lowerVectorBitRevImm<3>(N, DAG); - case Intrinsic::loongarch_lsx_vbitrevi_h: -+ case Intrinsic::loongarch_lasx_xvbitrevi_h: - return lowerVectorBitRevImm<4>(N, DAG); - case Intrinsic::loongarch_lsx_vbitrevi_w: -+ case Intrinsic::loongarch_lasx_xvbitrevi_w: - return lowerVectorBitRevImm<5>(N, DAG); - case Intrinsic::loongarch_lsx_vbitrevi_d: -+ case Intrinsic::loongarch_lasx_xvbitrevi_d: - return lowerVectorBitRevImm<6>(N, DAG); - case Intrinsic::loongarch_lsx_vfadd_s: - case Intrinsic::loongarch_lsx_vfadd_d: -+ case Intrinsic::loongarch_lasx_xvfadd_s: -+ case Intrinsic::loongarch_lasx_xvfadd_d: - return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vfsub_s: - case Intrinsic::loongarch_lsx_vfsub_d: -+ case Intrinsic::loongarch_lasx_xvfsub_s: -+ case Intrinsic::loongarch_lasx_xvfsub_d: - return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vfmul_s: - case Intrinsic::loongarch_lsx_vfmul_d: -+ case Intrinsic::loongarch_lasx_xvfmul_s: -+ case Intrinsic::loongarch_lasx_xvfmul_d: - return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vfdiv_s: - case Intrinsic::loongarch_lsx_vfdiv_d: -+ case Intrinsic::loongarch_lasx_xvfdiv_s: -+ case Intrinsic::loongarch_lasx_xvfdiv_d: - return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vfmadd_s: - case Intrinsic::loongarch_lsx_vfmadd_d: -+ case Intrinsic::loongarch_lasx_xvfmadd_s: -+ case Intrinsic::loongarch_lasx_xvfmadd_d: - return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3)); - case Intrinsic::loongarch_lsx_vinsgr2vr_b: -@@ -2397,10 +2731,12 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - N->getOperand(1), N->getOperand(2), - legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); - case Intrinsic::loongarch_lsx_vinsgr2vr_h: -+ case Intrinsic::loongarch_lasx_xvinsgr2vr_w: - return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), - legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); - case Intrinsic::loongarch_lsx_vinsgr2vr_w: -+ case Intrinsic::loongarch_lasx_xvinsgr2vr_d: - return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), - legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); -@@ -2411,7 +2747,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vreplgr2vr_b: - case Intrinsic::loongarch_lsx_vreplgr2vr_h: - case Intrinsic::loongarch_lsx_vreplgr2vr_w: -- case Intrinsic::loongarch_lsx_vreplgr2vr_d: { -+ case Intrinsic::loongarch_lsx_vreplgr2vr_d: -+ case Intrinsic::loongarch_lasx_xvreplgr2vr_b: -+ case Intrinsic::loongarch_lasx_xvreplgr2vr_h: -+ case Intrinsic::loongarch_lasx_xvreplgr2vr_w: -+ case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { - EVT ResTy = N->getValueType(0); - SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); - return DAG.getBuildVector(ResTy, DL, Ops); -@@ -2420,6 +2760,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vreplve_h: - case Intrinsic::loongarch_lsx_vreplve_w: - case Intrinsic::loongarch_lsx_vreplve_d: -+ case Intrinsic::loongarch_lasx_xvreplve_b: -+ case Intrinsic::loongarch_lasx_xvreplve_h: -+ case Intrinsic::loongarch_lasx_xvreplve_w: -+ case Intrinsic::loongarch_lasx_xvreplve_d: - return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), - N->getOperand(1), - DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), -@@ -2534,6 +2878,36 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, - case LoongArch::PseudoVBNZ_D: - CondOpc = LoongArch::VSETALLNEZ_D; - break; -+ case LoongArch::PseudoXVBZ: -+ CondOpc = LoongArch::XVSETEQZ_V; -+ break; -+ case LoongArch::PseudoXVBZ_B: -+ CondOpc = LoongArch::XVSETANYEQZ_B; -+ break; -+ case LoongArch::PseudoXVBZ_H: -+ CondOpc = LoongArch::XVSETANYEQZ_H; -+ break; -+ case LoongArch::PseudoXVBZ_W: -+ CondOpc = LoongArch::XVSETANYEQZ_W; -+ break; -+ case LoongArch::PseudoXVBZ_D: -+ CondOpc = LoongArch::XVSETANYEQZ_D; -+ break; -+ case LoongArch::PseudoXVBNZ: -+ CondOpc = LoongArch::XVSETNEZ_V; -+ break; -+ case LoongArch::PseudoXVBNZ_B: -+ CondOpc = LoongArch::XVSETALLNEZ_B; -+ break; -+ case LoongArch::PseudoXVBNZ_H: -+ CondOpc = LoongArch::XVSETALLNEZ_H; -+ break; -+ case LoongArch::PseudoXVBNZ_W: -+ CondOpc = LoongArch::XVSETALLNEZ_W; -+ break; -+ case LoongArch::PseudoXVBNZ_D: -+ CondOpc = LoongArch::XVSETALLNEZ_D; -+ break; - } - - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -@@ -2636,6 +3010,16 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - case LoongArch::PseudoVBNZ_H: - case LoongArch::PseudoVBNZ_W: - case LoongArch::PseudoVBNZ_D: -+ case LoongArch::PseudoXVBZ: -+ case LoongArch::PseudoXVBZ_B: -+ case LoongArch::PseudoXVBZ_H: -+ case LoongArch::PseudoXVBZ_W: -+ case LoongArch::PseudoXVBZ_D: -+ case LoongArch::PseudoXVBNZ: -+ case LoongArch::PseudoXVBNZ_B: -+ case LoongArch::PseudoXVBNZ_H: -+ case LoongArch::PseudoXVBNZ_W: -+ case LoongArch::PseudoXVBNZ_D: - return emitVecCondBranchPseudo(MI, BB, Subtarget); - } - } -@@ -2746,6 +3130,10 @@ const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, - LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, - LoongArch::VR6, LoongArch::VR7}; - -+const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, -+ LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, -+ LoongArch::XR6, LoongArch::XR7}; -+ - // Pass a 2*GRLen argument that has been split into two GRLen values through - // registers or the stack as necessary. - static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, -@@ -2894,6 +3282,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, - Reg = State.AllocateReg(ArgFPR64s); - else if (ValVT.is128BitVector()) - Reg = State.AllocateReg(ArgVRs); -+ else if (ValVT.is256BitVector()) -+ Reg = State.AllocateReg(ArgXRs); - else - Reg = State.AllocateReg(ArgGPRs); - -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -index a5d66ebac96a..ddd1c9943fac 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -@@ -55,6 +55,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - return; - } - -+ // XR->XR copies. -+ if (LoongArch::LASX256RegClass.contains(DstReg, SrcReg)) { -+ BuildMI(MBB, MBBI, DL, get(LoongArch::XVORI_B), DstReg) -+ .addReg(SrcReg, getKillRegState(KillSrc)) -+ .addImm(0); -+ return; -+ } -+ - // GPR->CFR copy. - if (LoongArch::CFRRegClass.contains(DstReg) && - LoongArch::GPRRegClass.contains(SrcReg)) { -@@ -109,6 +117,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( - Opcode = LoongArch::FST_D; - else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::VST; -+ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) -+ Opcode = LoongArch::XVST; - else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) - Opcode = LoongArch::PseudoST_CFR; - else -@@ -145,6 +155,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - Opcode = LoongArch::FLD_D; - else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::VLD; -+ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) -+ Opcode = LoongArch::XVLD; - else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) - Opcode = LoongArch::PseudoLD_CFR; - else -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index dc37b37b2186..a3afd4789dfc 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -10,6 +10,30 @@ - // - //===----------------------------------------------------------------------===// - -+def lasxsplati8 -+ : PatFrag<(ops node:$e0), -+ (v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplati16 -+ : PatFrag<(ops node:$e0), -+ (v16i16 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplati32 -+ : PatFrag<(ops node:$e0), -+ (v8i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplati64 -+ : PatFrag<(ops node:$e0), -+ (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; -+ - //===----------------------------------------------------------------------===// - // Instruction class templates - //===----------------------------------------------------------------------===// -@@ -1029,4 +1053,682 @@ def PseudoXVREPLI_D : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [], - "xvrepli.d", "$xd, $imm">; - } - -+def PseudoXVBNZ_B : VecCond; -+def PseudoXVBNZ_H : VecCond; -+def PseudoXVBNZ_W : VecCond; -+def PseudoXVBNZ_D : VecCond; -+def PseudoXVBNZ : VecCond; -+ -+def PseudoXVBZ_B : VecCond; -+def PseudoXVBZ_H : VecCond; -+def PseudoXVBZ_W : VecCond; -+def PseudoXVBZ_D : VecCond; -+def PseudoXVBZ : VecCond; -+ -+} // Predicates = [HasExtLASX] -+ -+multiclass PatXr { -+ def : Pat<(v32i8 (OpNode (v32i8 LASX256:$xj))), -+ (!cast(Inst#"_B") LASX256:$xj)>; -+ def : Pat<(v16i16 (OpNode (v16i16 LASX256:$xj))), -+ (!cast(Inst#"_H") LASX256:$xj)>; -+ def : Pat<(v8i32 (OpNode (v8i32 LASX256:$xj))), -+ (!cast(Inst#"_W") LASX256:$xj)>; -+ def : Pat<(v4i64 (OpNode (v4i64 LASX256:$xj))), -+ (!cast(Inst#"_D") LASX256:$xj)>; -+} -+ -+multiclass PatXrXr { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), -+ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatXrXrF { -+ def : Pat<(OpNode (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), -+ (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatXrXrU { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), -+ (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), -+ (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatXrSimm5 { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; -+} -+ -+multiclass PatXrUimm5 { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; -+} -+ -+multiclass PatXrXrXr { -+ def : Pat<(OpNode (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), -+ (v32i8 LASX256:$xk)), -+ (!cast(Inst#"_B") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), -+ (v16i16 LASX256:$xk)), -+ (!cast(Inst#"_H") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), -+ (v8i32 LASX256:$xk)), -+ (!cast(Inst#"_W") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), -+ (v4i64 LASX256:$xk)), -+ (!cast(Inst#"_D") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatShiftXrXr { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (and vsplati8_imm_eq_7, -+ (v32i8 LASX256:$xk))), -+ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (and vsplati16_imm_eq_15, -+ (v16i16 LASX256:$xk))), -+ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (and vsplati32_imm_eq_31, -+ (v8i32 LASX256:$xk))), -+ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (and vsplati64_imm_eq_63, -+ (v4i64 LASX256:$xk))), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatShiftXrUimm { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm3 uimm3:$imm))), -+ (!cast(Inst#"_B") LASX256:$xj, uimm3:$imm)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm4 uimm4:$imm))), -+ (!cast(Inst#"_H") LASX256:$xj, uimm4:$imm)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_W") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm6 uimm6:$imm))), -+ (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; -+} -+ -+class PatXrXrB -+ : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (Inst LASX256:$xj, LASX256:$xk)>; -+ -+let Predicates = [HasExtLASX] in { -+ -+// XVADD_{B/H/W/D} -+defm : PatXrXr; -+// XVSUB_{B/H/W/D} -+defm : PatXrXr; -+ -+// XVADDI_{B/H/W/D}U -+defm : PatXrUimm5; -+// XVSUBI_{B/H/W/D}U -+defm : PatXrUimm5; -+ -+// XVNEG_{B/H/W/D} -+def : Pat<(sub immAllZerosV, (v32i8 LASX256:$xj)), (XVNEG_B LASX256:$xj)>; -+def : Pat<(sub immAllZerosV, (v16i16 LASX256:$xj)), (XVNEG_H LASX256:$xj)>; -+def : Pat<(sub immAllZerosV, (v8i32 LASX256:$xj)), (XVNEG_W LASX256:$xj)>; -+def : Pat<(sub immAllZerosV, (v4i64 LASX256:$xj)), (XVNEG_D LASX256:$xj)>; -+ -+// XVMAX[I]_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+defm : PatXrSimm5; -+defm : PatXrUimm5; -+ -+// XVMIN[I]_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+defm : PatXrSimm5; -+defm : PatXrUimm5; -+ -+// XVMUL_{B/H/W/D} -+defm : PatXrXr; -+ -+// XVMADD_{B/H/W/D} -+defm : PatXrXrXr; -+// XVMSUB_{B/H/W/D} -+defm : PatXrXrXr; -+ -+// XVDIV_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+ -+// XVMOD_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+ -+// XVAND_V -+def : PatXrXrB; -+// XVNOR_V -+def : PatXrXrB; -+// XVXOR_V -+def : PatXrXrB; -+// XVNOR_V -+def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 LASX256:$xk))), -+ (XVNOR_V LASX256:$xj, LASX256:$xk)>; -+ -+// XVANDI_B -+def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), -+ (XVANDI_B LASX256:$xj, uimm8:$imm)>; -+// XVORI_B -+def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), -+ (XVORI_B LASX256:$xj, uimm8:$imm)>; -+ -+// XVXORI_B -+def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), -+ (XVXORI_B LASX256:$xj, uimm8:$imm)>; -+ -+// XVSLL[I]_{B/H/W/D} -+defm : PatXrXr; -+defm : PatShiftXrXr; -+defm : PatShiftXrUimm; -+ -+// XVSRL[I]_{B/H/W/D} -+defm : PatXrXr; -+defm : PatShiftXrXr; -+defm : PatShiftXrUimm; -+ -+// XVSRA[I]_{B/H/W/D} -+defm : PatXrXr; -+defm : PatShiftXrXr; -+defm : PatShiftXrUimm; -+ -+// XVPCNT_{B/H/W/D} -+defm : PatXr; -+ -+// XVBITCLR_{B/H/W/D} -+def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))), -+ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))), -+ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))), -+ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))), -+ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; -+def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati8imm7 v32i8:$xk)))), -+ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati16imm15 v16i16:$xk)))), -+ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati32imm31 v8i32:$xk)))), -+ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati64imm63 v4i64:$xk)))), -+ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; -+ -+// XVBITCLRI_{B/H/W/D} -+def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), -+ (XVBITCLRI_B LASX256:$xj, uimm3:$imm)>; -+def : Pat<(and (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), -+ (XVBITCLRI_H LASX256:$xj, uimm4:$imm)>; -+def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), -+ (XVBITCLRI_W LASX256:$xj, uimm5:$imm)>; -+def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), -+ (XVBITCLRI_D LASX256:$xj, uimm6:$imm)>; -+ -+// XVBITSET_{B/H/W/D} -+def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), -+ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), -+ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), -+ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), -+ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; -+def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), -+ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), -+ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), -+ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), -+ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; -+ -+// XVBITSETI_{B/H/W/D} -+def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), -+ (XVBITSETI_B LASX256:$xj, uimm3:$imm)>; -+def : Pat<(or (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), -+ (XVBITSETI_H LASX256:$xj, uimm4:$imm)>; -+def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), -+ (XVBITSETI_W LASX256:$xj, uimm5:$imm)>; -+def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), -+ (XVBITSETI_D LASX256:$xj, uimm6:$imm)>; -+ -+// XVBITREV_{B/H/W/D} -+def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), -+ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), -+ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), -+ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), -+ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; -+def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), -+ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), -+ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), -+ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), -+ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; -+ -+// XVBITREVI_{B/H/W/D} -+def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), -+ (XVBITREVI_B LASX256:$xj, uimm3:$imm)>; -+def : Pat<(xor (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), -+ (XVBITREVI_H LASX256:$xj, uimm4:$imm)>; -+def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), -+ (XVBITREVI_W LASX256:$xj, uimm5:$imm)>; -+def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), -+ (XVBITREVI_D LASX256:$xj, uimm6:$imm)>; -+ -+// XVFADD_{S/D} -+defm : PatXrXrF; -+ -+// XVFSUB_{S/D} -+defm : PatXrXrF; -+ -+// XVFMUL_{S/D} -+defm : PatXrXrF; -+ -+// XVFDIV_{S/D} -+defm : PatXrXrF; -+ -+// XVFMADD_{S/D} -+def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), -+ (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), -+ (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+ -+// XVINSGR2VR_{W/D} -+def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), -+ (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; -+def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), -+ (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; -+ -+// XVPICKVE2GR_W[U] -+def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), -+ (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; -+def : Pat<(loongarch_vpick_zext_elt v8i32:$xd, uimm3:$imm, i32), -+ (XVPICKVE2GR_WU v8i32:$xd, uimm3:$imm)>; -+ -+// XVREPLGR2VR_{B/H/W/D} -+def : Pat<(lasxsplati8 GPR:$rj), (XVREPLGR2VR_B GPR:$rj)>; -+def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>; -+def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>; -+def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>; -+ -+// XVREPLVE_{B/H/W/D} -+def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk), -+ (XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v16i16:$xj, GRLenVT:$rk), -+ (XVREPLVE_H v16i16:$xj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), -+ (XVREPLVE_W v8i32:$xj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), -+ (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; -+ -+// Loads/Stores -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in { -+ defm : LdPat; -+ def : RegRegLdPat; -+ defm : StPat; -+ def : RegRegStPat; -+} -+ -+} // Predicates = [HasExtLASX] -+ -+/// Intrinsic pattern -+ -+class deriveLASXIntrinsic { -+ Intrinsic ret = !cast(!tolower("int_loongarch_lasx_"#Inst)); -+} -+ -+let Predicates = [HasExtLASX] in { -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xj, vty:$xk), -+// (LAInst vty:$xj, vty:$xk)>; -+foreach Inst = ["XVSADD_B", "XVSADD_BU", "XVSSUB_B", "XVSSUB_BU", -+ "XVHADDW_H_B", "XVHADDW_HU_BU", "XVHSUBW_H_B", "XVHSUBW_HU_BU", -+ "XVADDWEV_H_B", "XVADDWOD_H_B", "XVSUBWEV_H_B", "XVSUBWOD_H_B", -+ "XVADDWEV_H_BU", "XVADDWOD_H_BU", "XVSUBWEV_H_BU", "XVSUBWOD_H_BU", -+ "XVADDWEV_H_BU_B", "XVADDWOD_H_BU_B", -+ "XVAVG_B", "XVAVG_BU", "XVAVGR_B", "XVAVGR_BU", -+ "XVABSD_B", "XVABSD_BU", "XVADDA_B", "XVMUH_B", "XVMUH_BU", -+ "XVMULWEV_H_B", "XVMULWOD_H_B", "XVMULWEV_H_BU", "XVMULWOD_H_BU", -+ "XVMULWEV_H_BU_B", "XVMULWOD_H_BU_B", "XVSIGNCOV_B", -+ "XVANDN_V", "XVORN_V", "XVROTR_B", "XVSRLR_B", "XVSRAR_B", -+ "XVSEQ_B", "XVSLE_B", "XVSLE_BU", "XVSLT_B", "XVSLT_BU", -+ "XVPACKEV_B", "XVPACKOD_B", "XVPICKEV_B", "XVPICKOD_B", -+ "XVILVL_B", "XVILVH_B"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVSADD_H", "XVSADD_HU", "XVSSUB_H", "XVSSUB_HU", -+ "XVHADDW_W_H", "XVHADDW_WU_HU", "XVHSUBW_W_H", "XVHSUBW_WU_HU", -+ "XVADDWEV_W_H", "XVADDWOD_W_H", "XVSUBWEV_W_H", "XVSUBWOD_W_H", -+ "XVADDWEV_W_HU", "XVADDWOD_W_HU", "XVSUBWEV_W_HU", "XVSUBWOD_W_HU", -+ "XVADDWEV_W_HU_H", "XVADDWOD_W_HU_H", -+ "XVAVG_H", "XVAVG_HU", "XVAVGR_H", "XVAVGR_HU", -+ "XVABSD_H", "XVABSD_HU", "XVADDA_H", "XVMUH_H", "XVMUH_HU", -+ "XVMULWEV_W_H", "XVMULWOD_W_H", "XVMULWEV_W_HU", "XVMULWOD_W_HU", -+ "XVMULWEV_W_HU_H", "XVMULWOD_W_HU_H", "XVSIGNCOV_H", "XVROTR_H", -+ "XVSRLR_H", "XVSRAR_H", "XVSRLN_B_H", "XVSRAN_B_H", "XVSRLRN_B_H", -+ "XVSRARN_B_H", "XVSSRLN_B_H", "XVSSRAN_B_H", "XVSSRLN_BU_H", -+ "XVSSRAN_BU_H", "XVSSRLRN_B_H", "XVSSRARN_B_H", "XVSSRLRN_BU_H", -+ "XVSSRARN_BU_H", -+ "XVSEQ_H", "XVSLE_H", "XVSLE_HU", "XVSLT_H", "XVSLT_HU", -+ "XVPACKEV_H", "XVPACKOD_H", "XVPICKEV_H", "XVPICKOD_H", -+ "XVILVL_H", "XVILVH_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVSADD_W", "XVSADD_WU", "XVSSUB_W", "XVSSUB_WU", -+ "XVHADDW_D_W", "XVHADDW_DU_WU", "XVHSUBW_D_W", "XVHSUBW_DU_WU", -+ "XVADDWEV_D_W", "XVADDWOD_D_W", "XVSUBWEV_D_W", "XVSUBWOD_D_W", -+ "XVADDWEV_D_WU", "XVADDWOD_D_WU", "XVSUBWEV_D_WU", "XVSUBWOD_D_WU", -+ "XVADDWEV_D_WU_W", "XVADDWOD_D_WU_W", -+ "XVAVG_W", "XVAVG_WU", "XVAVGR_W", "XVAVGR_WU", -+ "XVABSD_W", "XVABSD_WU", "XVADDA_W", "XVMUH_W", "XVMUH_WU", -+ "XVMULWEV_D_W", "XVMULWOD_D_W", "XVMULWEV_D_WU", "XVMULWOD_D_WU", -+ "XVMULWEV_D_WU_W", "XVMULWOD_D_WU_W", "XVSIGNCOV_W", "XVROTR_W", -+ "XVSRLR_W", "XVSRAR_W", "XVSRLN_H_W", "XVSRAN_H_W", "XVSRLRN_H_W", -+ "XVSRARN_H_W", "XVSSRLN_H_W", "XVSSRAN_H_W", "XVSSRLN_HU_W", -+ "XVSSRAN_HU_W", "XVSSRLRN_H_W", "XVSSRARN_H_W", "XVSSRLRN_HU_W", -+ "XVSSRARN_HU_W", -+ "XVSEQ_W", "XVSLE_W", "XVSLE_WU", "XVSLT_W", "XVSLT_WU", -+ "XVPACKEV_W", "XVPACKOD_W", "XVPICKEV_W", "XVPICKOD_W", -+ "XVILVL_W", "XVILVH_W", "XVPERM_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVADD_Q", "XVSUB_Q", -+ "XVSADD_D", "XVSADD_DU", "XVSSUB_D", "XVSSUB_DU", -+ "XVHADDW_Q_D", "XVHADDW_QU_DU", "XVHSUBW_Q_D", "XVHSUBW_QU_DU", -+ "XVADDWEV_Q_D", "XVADDWOD_Q_D", "XVSUBWEV_Q_D", "XVSUBWOD_Q_D", -+ "XVADDWEV_Q_DU", "XVADDWOD_Q_DU", "XVSUBWEV_Q_DU", "XVSUBWOD_Q_DU", -+ "XVADDWEV_Q_DU_D", "XVADDWOD_Q_DU_D", -+ "XVAVG_D", "XVAVG_DU", "XVAVGR_D", "XVAVGR_DU", -+ "XVABSD_D", "XVABSD_DU", "XVADDA_D", "XVMUH_D", "XVMUH_DU", -+ "XVMULWEV_Q_D", "XVMULWOD_Q_D", "XVMULWEV_Q_DU", "XVMULWOD_Q_DU", -+ "XVMULWEV_Q_DU_D", "XVMULWOD_Q_DU_D", "XVSIGNCOV_D", "XVROTR_D", -+ "XVSRLR_D", "XVSRAR_D", "XVSRLN_W_D", "XVSRAN_W_D", "XVSRLRN_W_D", -+ "XVSRARN_W_D", "XVSSRLN_W_D", "XVSSRAN_W_D", "XVSSRLN_WU_D", -+ "XVSSRAN_WU_D", "XVSSRLRN_W_D", "XVSSRARN_W_D", "XVSSRLRN_WU_D", -+ "XVSSRARN_WU_D", "XVFFINT_S_L", -+ "XVSEQ_D", "XVSLE_D", "XVSLE_DU", "XVSLT_D", "XVSLT_DU", -+ "XVPACKEV_D", "XVPACKOD_D", "XVPICKEV_D", "XVPICKOD_D", -+ "XVILVL_D", "XVILVH_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), -+// (LAInst vty:$xd, vty:$xj, vty:$xk)>; -+foreach Inst = ["XVMADDWEV_H_B", "XVMADDWOD_H_B", "XVMADDWEV_H_BU", -+ "XVMADDWOD_H_BU", "XVMADDWEV_H_BU_B", "XVMADDWOD_H_BU_B"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v16i16 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVMADDWEV_W_H", "XVMADDWOD_W_H", "XVMADDWEV_W_HU", -+ "XVMADDWOD_W_HU", "XVMADDWEV_W_HU_H", "XVMADDWOD_W_HU_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8i32 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVMADDWEV_D_W", "XVMADDWOD_D_W", "XVMADDWEV_D_WU", -+ "XVMADDWOD_D_WU", "XVMADDWEV_D_WU_W", "XVMADDWOD_D_WU_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4i64 LASX256:$xd), (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", -+ "XVMADDWOD_Q_DU", "XVMADDWEV_Q_DU_D", "XVMADDWOD_Q_DU_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xj), -+// (LAInst vty:$xj)>; -+foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", -+ "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", -+ "XVCLO_B", "XVCLZ_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", -+ "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", -+ "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", -+ "XVCLO_H", "XVCLZ_H", "XVFCVTL_S_H", "XVFCVTH_S_H", -+ "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", -+ "VEXT2XV_DU_HU", "XVREPLVE0_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", -+ "XVCLO_W", "XVCLZ_W", "XVFFINT_S_W", "XVFFINT_S_WU", -+ "XVFFINTL_D_W", "XVFFINTH_D_W", -+ "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", -+ "XVEXTL_Q_D", "XVEXTL_QU_DU", -+ "XVCLO_D", "XVCLZ_D", "XVFFINT_D_L", "XVFFINT_D_LU", -+ "XVREPLVE0_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+ -+// Pat<(Intrinsic timm:$imm) -+// (LAInst timm:$imm)>; -+def : Pat<(int_loongarch_lasx_xvldi timm:$imm), -+ (XVLDI (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret timm:$imm), -+ (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xj, timm:$imm) -+// (LAInst vty:$xj, timm:$imm)>; -+foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", -+ "XVSLLWIL_HU_BU", "XVSRLRI_B", "XVSRARI_B", -+ "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", -+ "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", -+ "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", -+ "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", -+ "XVREPL128VEI_H", "XVSHUF4I_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", -+ "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", -+ "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", -+ "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", -+ "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", -+ "XVPICKVE2GR_D", "XVPICKVE2GR_DU", -+ "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) -+// (LAInst vty:$xd, vty:$xj, timm:$imm)>; -+foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", -+ "XVSSRLNI_B_H", "XVSSRANI_B_H", "XVSSRLNI_BU_H", "XVSSRANI_BU_H", -+ "XVSSRLRNI_B_H", "XVSSRARNI_B_H", "XVSSRLRNI_BU_H", "XVSSRARNI_BU_H", -+ "XVFRSTPI_B", "XVBITSELI_B", "XVEXTRINS_B", "XVPERMI_Q"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", -+ "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", -+ "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", -+ "XVFRSTPI_H", "XVEXTRINS_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", -+ "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", -+ "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", -+ "XVPERMI_W", "XVEXTRINS_W", "XVINSVE0_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", -+ "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", -+ "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", -+ "XVSHUF4I_D", "XVEXTRINS_D", "XVINSVE0_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, -+ (to_valide_timm timm:$imm))>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), -+// (LAInst vty:$xd, vty:$xj, vty:$xk)>; -+foreach Inst = ["XVFRSTP_B", "XVBITSEL_V", "XVSHUF_B"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVFRSTP_H", "XVSHUF_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+def : Pat<(int_loongarch_lasx_xvshuf_w (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), -+ (v8i32 LASX256:$xk)), -+ (XVSHUF_W LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+def : Pat<(int_loongarch_lasx_xvshuf_d (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), -+ (v4i64 LASX256:$xk)), -+ (XVSHUF_D LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ -+// vty: v8f32/v4f64 -+// Pat<(Intrinsic vty:$xj, vty:$xk, vty:$xa), -+// (LAInst vty:$xj, vty:$xk, vty:$xa)>; -+foreach Inst = ["XVFMSUB_S", "XVFNMADD_S", "XVFNMSUB_S"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), (v8f32 LASX256:$xa)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; -+foreach Inst = ["XVFMSUB_D", "XVFNMADD_D", "XVFNMSUB_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), (v4f64 LASX256:$xa)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; -+ -+// vty: v8f32/v4f64 -+// Pat<(Intrinsic vty:$xj, vty:$xk), -+// (LAInst vty:$xj, vty:$xk)>; -+foreach Inst = ["XVFMAX_S", "XVFMIN_S", "XVFMAXA_S", "XVFMINA_S", "XVFCVT_H_S", -+ "XVFCMP_CAF_S", "XVFCMP_CUN_S", "XVFCMP_CEQ_S", "XVFCMP_CUEQ_S", -+ "XVFCMP_CLT_S", "XVFCMP_CULT_S", "XVFCMP_CLE_S", "XVFCMP_CULE_S", -+ "XVFCMP_CNE_S", "XVFCMP_COR_S", "XVFCMP_CUNE_S", -+ "XVFCMP_SAF_S", "XVFCMP_SUN_S", "XVFCMP_SEQ_S", "XVFCMP_SUEQ_S", -+ "XVFCMP_SLT_S", "XVFCMP_SULT_S", "XVFCMP_SLE_S", "XVFCMP_SULE_S", -+ "XVFCMP_SNE_S", "XVFCMP_SOR_S", "XVFCMP_SUNE_S"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVFMAX_D", "XVFMIN_D", "XVFMAXA_D", "XVFMINA_D", "XVFCVT_S_D", -+ "XVFTINTRNE_W_D", "XVFTINTRZ_W_D", "XVFTINTRP_W_D", "XVFTINTRM_W_D", -+ "XVFTINT_W_D", -+ "XVFCMP_CAF_D", "XVFCMP_CUN_D", "XVFCMP_CEQ_D", "XVFCMP_CUEQ_D", -+ "XVFCMP_CLT_D", "XVFCMP_CULT_D", "XVFCMP_CLE_D", "XVFCMP_CULE_D", -+ "XVFCMP_CNE_D", "XVFCMP_COR_D", "XVFCMP_CUNE_D", -+ "XVFCMP_SAF_D", "XVFCMP_SUN_D", "XVFCMP_SEQ_D", "XVFCMP_SUEQ_D", -+ "XVFCMP_SLT_D", "XVFCMP_SULT_D", "XVFCMP_SLE_D", "XVFCMP_SULE_D", -+ "XVFCMP_SNE_D", "XVFCMP_SOR_D", "XVFCMP_SUNE_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+ -+// vty: v8f32/v4f64 -+// Pat<(Intrinsic vty:$xj), -+// (LAInst vty:$xj)>; -+foreach Inst = ["XVFLOGB_S", "XVFCLASS_S", "XVFSQRT_S", "XVFRECIP_S", "XVFRSQRT_S", -+ "XVFRINT_S", "XVFCVTL_D_S", "XVFCVTH_D_S", -+ "XVFRINTRNE_S", "XVFRINTRZ_S", "XVFRINTRP_S", "XVFRINTRM_S", -+ "XVFTINTRNE_W_S", "XVFTINTRZ_W_S", "XVFTINTRP_W_S", "XVFTINTRM_W_S", -+ "XVFTINT_W_S", "XVFTINTRZ_WU_S", "XVFTINT_WU_S", -+ "XVFTINTRNEL_L_S", "XVFTINTRNEH_L_S", "XVFTINTRZL_L_S", -+ "XVFTINTRZH_L_S", "XVFTINTRPL_L_S", "XVFTINTRPH_L_S", -+ "XVFTINTRML_L_S", "XVFTINTRMH_L_S", "XVFTINTL_L_S", -+ "XVFTINTH_L_S"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v8f32 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_D", -+ "XVFRINT_D", -+ "XVFRINTRNE_D", "XVFRINTRZ_D", "XVFRINTRP_D", "XVFRINTRM_D", -+ "XVFTINTRNE_L_D", "XVFTINTRZ_L_D", "XVFTINTRP_L_D", "XVFTINTRM_L_D", -+ "XVFTINT_L_D", "XVFTINTRZ_LU_D", "XVFTINT_LU_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+ -+def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), -+ (XVPICKVE_W v8f32:$xj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), -+ (XVPICKVE_D v4f64:$xj, (to_valide_timm timm:$imm))>; -+ -+// load -+def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), -+ (XVLD GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), -+ (XVLDX GPR:$rj, GPR:$rk)>; -+ -+def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), -+ (XVLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), -+ (XVLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), -+ (XVLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), -+ (XVLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; -+ -+// store -+def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), -+ (XVST LASX256:$xd, GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), -+ (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; -+ -+def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), -+ (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), -+ (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), -+ (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), -+ (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+ - } // Predicates = [HasExtLASX] --- -2.20.1 - - -From 76928242b8b8e6228d1b1ec80c69b61c94d6ec79 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:10:41 +0800 -Subject: [PATCH 04/35] [LoongArch] Add LSX intrinsic testcases - -Depends on D155829 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D155834 - -(cherry picked from commit f3aa4416319aed198841401c6c9dc2e49afe2507) ---- - .../CodeGen/LoongArch/lsx/intrinsic-absd.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-add.ll | 62 ++ - .../CodeGen/LoongArch/lsx/intrinsic-adda.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-addi.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-addw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-and.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-andi.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-andn.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-avg.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-avgr.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-bitclr.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-bitrev.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-bitsel.ll | 14 + - .../LoongArch/lsx/intrinsic-bitseli.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-bitset.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-bsll.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-bsrl.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-clo.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-clz.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-div.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-exth.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-extl.ll | 26 + - .../LoongArch/lsx/intrinsic-extrins.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-fadd.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fclass.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fcmp.ll | 530 ++++++++++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-fcvt.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fcvth.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fdiv.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-ffint.ll | 86 +++ - .../CodeGen/LoongArch/lsx/intrinsic-flogb.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmadd.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmax.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmin.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmina.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmsub.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmul.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-frecip.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-frint.ll | 122 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-frstp.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fsub.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-ftint.ll | 350 ++++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-haddw.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-hsubw.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ilv.ll | 98 ++++ - .../LoongArch/lsx/intrinsic-insgr2vr.ll | 54 ++ - .../CodeGen/LoongArch/lsx/intrinsic-ld.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-ldi.ll | 62 ++ - .../CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-madd.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-maddw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-max.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-min.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-mod.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-mskgez.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-mskltz.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-msknz.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-msub.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-muh.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-mul.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-mulw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-neg.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-nor.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-nori.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-or.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-ori.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-orn.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-pack.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-pcnt.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-permi.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-pick.ll | 98 ++++ - .../LoongArch/lsx/intrinsic-pickve2gr.ll | 98 ++++ - .../LoongArch/lsx/intrinsic-replgr2vr.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-replve.ll | 50 ++ - .../LoongArch/lsx/intrinsic-replvei.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-rotr.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-sadd.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-sat.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-seq.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-set.ll | 38 ++ - .../LoongArch/lsx/intrinsic-setallnez.ll | 74 +++ - .../LoongArch/lsx/intrinsic-setanyeqz.ll | 74 +++ - .../CodeGen/LoongArch/lsx/intrinsic-shuf.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll | 50 ++ - .../LoongArch/lsx/intrinsic-signcov.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-sle.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-sll.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-sllwil.ll | 74 +++ - .../CodeGen/LoongArch/lsx/intrinsic-slt.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-sra.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-sran.ll | 38 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srani.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srar.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-srarn.ll | 38 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srarni.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srl.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-srln.ll | 38 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srlni.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srlr.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-srlrn.ll | 38 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srlrni.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-ssran.ll | 74 +++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrani.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll | 74 +++ - .../LoongArch/lsx/intrinsic-ssrarni.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrln.ll | 74 +++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll | 74 +++ - .../LoongArch/lsx/intrinsic-ssrlrni.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ssub.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-st.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-stelm.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-sub.ll | 62 ++ - .../CodeGen/LoongArch/lsx/intrinsic-subi.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-subw.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-xor.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-xori.ll | 14 + - 123 files changed, 8902 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll -new file mode 100644 -index 000000000000..811d9d712de4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vabsd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vabsd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vabsd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vabsd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vabsd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vabsd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vabsd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vabsd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll -new file mode 100644 -index 000000000000..fac16c8308da ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vadd_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.q $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll -new file mode 100644 -index 000000000000..79be0a184bfb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vadda_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vadda_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vadda_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vadda_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vadda_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vadda_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vadda_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vadda_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll -new file mode 100644 -index 000000000000..b9134e0724fe ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vaddi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vaddi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vaddi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vaddi_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vaddi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll -new file mode 100644 -index 000000000000..086e3bec12d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.h.bu.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.w.hu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.d.wu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.q.du.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.h.bu.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.w.hu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.d.wu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.q.du.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll -new file mode 100644 -index 000000000000..77496239c3a9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vand_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vand_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll -new file mode 100644 -index 000000000000..9a1c38a641d0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vandi_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vandi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vandi.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll -new file mode 100644 -index 000000000000..b08c759ecc32 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vandn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vandn_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll -new file mode 100644 -index 000000000000..fb0861f4cd5e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vavg_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vavg_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vavg_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vavg_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vavg_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vavg_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vavg_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vavg_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll -new file mode 100644 -index 000000000000..8bf7d0ed8817 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vavgr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vavgr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vavgr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vavgr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vavgr_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vavgr_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vavgr_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vavgr_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll -new file mode 100644 -index 000000000000..f5fba6dbb141 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vbitclr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitclr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclr.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vbitclr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitclr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclr.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vbitclr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitclr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclr.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vbitclr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitclr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbitclri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclri.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vbitclri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclri.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vbitclri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclri.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vbitclri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclri.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll -new file mode 100644 -index 000000000000..ad56e88fdb88 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vbitrev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitrev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrev.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vbitrev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitrev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrev.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vbitrev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitrev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrev.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vbitrev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitrev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrev.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbitrevi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrevi.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vbitrevi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrevi.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vbitrevi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vbitrevi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll -new file mode 100644 -index 000000000000..4b4b5ff1fc8c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vbitsel_v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vbitsel_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitsel.v $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll -new file mode 100644 -index 000000000000..28d342b5c378 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitseli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 255) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll -new file mode 100644 -index 000000000000..75d98e6f8bce ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vbitset_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitset_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitset.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vbitset_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitset_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitset.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vbitset_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitset_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitset.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vbitset_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitset_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitset.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbitseti_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseti.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vbitseti_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseti.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vbitseti_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseti.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vbitseti_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseti.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll -new file mode 100644 -index 000000000000..e7eb1cfcb407 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsll_v(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbsll_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbsll.v $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll -new file mode 100644 -index 000000000000..fe0565297641 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbsrl_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll -new file mode 100644 -index 000000000000..c581109f3fd0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vclo_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vclo_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclo.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vclo_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vclo_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclo.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vclo_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vclo_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclo.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vclo_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vclo_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclo.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll -new file mode 100644 -index 000000000000..25c37b64349b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vclz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vclz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclz.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vclz_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vclz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclz.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vclz_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vclz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclz.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vclz_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vclz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclz.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll -new file mode 100644 -index 000000000000..53166e84d269 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vdiv_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vdiv_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vdiv_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vdiv_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vdiv_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vdiv_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vdiv_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vdiv_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll -new file mode 100644 -index 000000000000..2f3e891a9eef ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8>) -+ -+define <8 x i16> @lsx_vexth_h_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.h.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16>) -+ -+define <4 x i32> @lsx_vexth_w_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.w.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32>) -+ -+define <2 x i64> @lsx_vexth_d_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.d.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vexth_q_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.q.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8>) -+ -+define <8 x i16> @lsx_vexth_hu_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.hu.bu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16>) -+ -+define <4 x i32> @lsx_vexth_wu_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.wu.hu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32>) -+ -+define <2 x i64> @lsx_vexth_du_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.du.wu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64>) -+ -+define <2 x i64> @lsx_vexth_qu_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.qu.du $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll -new file mode 100644 -index 000000000000..cbf19e2a3919 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vextl_q_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vextl_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextl.q.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64>) -+ -+define <2 x i64> @lsx_vextl_qu_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vextl_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextl.qu.du $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll -new file mode 100644 -index 000000000000..8f03a2b81291 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vextrins_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextrins.b $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 255) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vextrins_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextrins.h $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 255) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vextrins_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextrins.w $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 255) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vextrins_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextrins.d $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 255) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll -new file mode 100644 -index 000000000000..569002314c92 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfadd_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfadd_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll -new file mode 100644 -index 000000000000..0c6682187101 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float>) -+ -+define <4 x i32> @lsx_vfclass_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfclass_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfclass.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double>) -+ -+define <2 x i64> @lsx_vfclass_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfclass_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfclass.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll -new file mode 100644 -index 000000000000..669c53b73b16 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll -@@ -0,0 +1,530 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_caf_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_caf_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.caf.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_caf_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_caf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.caf.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cun_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cun_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cun.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cun_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cun_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cun.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_ceq_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_ceq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_ceq_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_ceq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cueq_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cueq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cueq_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cueq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_clt_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_clt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_clt_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_clt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cult_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cult_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cult.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cult_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cult_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cult.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cle_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cle_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cle_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cule_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cule_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cule.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cule_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cule_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cule.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cne_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cne.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cne_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cne.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cor_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cor_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cor.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cor_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cor_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cor.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cune_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cune_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cune.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cune_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cune_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cune.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_saf_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_saf_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.saf.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_saf_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_saf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.saf.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sun_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sun_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sun.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sun_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sun_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sun.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_seq_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_seq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.seq.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_seq_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_seq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.seq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sueq_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sueq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sueq.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sueq_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sueq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sueq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_slt_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_slt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.slt.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_slt_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_slt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.slt.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sult_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sult_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sult.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sult_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sult_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sult.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sle_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sle_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sle.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sle_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sle.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sule_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sule_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sule.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sule_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sule_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sule.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sne_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sne.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sne_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sne.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sor_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sor_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sor.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sor_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sor_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sor.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sune_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sune_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sune.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sune_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sune_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sune.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll -new file mode 100644 -index 000000000000..a6a151a96d84 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float>, <4 x float>) -+ -+define <8 x i16> @lsx_vfcvt_h_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcvt_h_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvt.h.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> %va, <4 x float> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double>, <2 x double>) -+ -+define <4 x float> @lsx_vfcvt_s_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcvt_s_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvt.s.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x float> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll -new file mode 100644 -index 000000000000..a9e4328bd011 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16>) -+ -+define <4 x float> @lsx_vfcvth_s_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vfcvth_s_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvth.s.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float>) -+ -+define <2 x double> @lsx_vfcvth_d_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfcvth_d_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvth.d.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll -new file mode 100644 -index 000000000000..9a69964bb227 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16>) -+ -+define <4 x float> @lsx_vfcvtl_s_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vfcvtl_s_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvtl.s.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float>) -+ -+define <2 x double> @lsx_vfcvtl_d_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfcvtl_d_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvtl.d.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll -new file mode 100644 -index 000000000000..1ca8e5e2c0e9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfdiv_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfdiv_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfdiv.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfdiv_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfdiv_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfdiv.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll -new file mode 100644 -index 000000000000..62fbcfa339cd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll -@@ -0,0 +1,86 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32>) -+ -+define <4 x float> @lsx_vffint_s_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vffint_s_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.s.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64>) -+ -+define <2 x double> @lsx_vffint_d_l(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vffint_d_l: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.d.l $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32>) -+ -+define <4 x float> @lsx_vffint_s_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vffint_s_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.s.wu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64>) -+ -+define <2 x double> @lsx_vffint_d_lu(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vffint_d_lu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.d.lu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> %va) -+ ret <2 x double> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32>) -+ -+define <2 x double> @lsx_vffintl_d_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vffintl_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffintl.d.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> %va) -+ ret <2 x double> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32>) -+ -+define <2 x double> @lsx_vffinth_d_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vffinth_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffinth.d.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64>, <2 x i64>) -+ -+define <4 x float> @lsx_vffint_s_l(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vffint_s_l: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.s.l $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x float> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll -new file mode 100644 -index 000000000000..d8382acc70ed ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float>) -+ -+define <4 x float> @lsx_vflogb_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vflogb_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vflogb.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double>) -+ -+define <2 x double> @lsx_vflogb_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vflogb_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vflogb.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll -new file mode 100644 -index 000000000000..adbaf6c76b1b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float>, <4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { -+; CHECK-LABEL: lsx_vfmadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double>, <2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { -+; CHECK-LABEL: lsx_vfmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll -new file mode 100644 -index 000000000000..89f757c4e456 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmax_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmax_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmax.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmax_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmax_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmax.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll -new file mode 100644 -index 000000000000..5662acc0b9a1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmaxa_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmaxa_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmaxa.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmaxa_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmaxa_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmaxa.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll -new file mode 100644 -index 000000000000..0f844240277f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmin_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmin_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmin.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmin_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmin_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmin.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll -new file mode 100644 -index 000000000000..27f70b5fba32 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmina_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmina_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmina.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmina_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmina_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmina.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll -new file mode 100644 -index 000000000000..856ca9cadbd9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float>, <4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { -+; CHECK-LABEL: lsx_vfmsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmsub.s $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double>, <2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { -+; CHECK-LABEL: lsx_vfmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmsub.d $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll -new file mode 100644 -index 000000000000..1e6c4c77d536 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmul_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmul_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmul_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmul_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmul.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll -new file mode 100644 -index 000000000000..e1a9ea78ef9d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float>, <4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfnmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { -+; CHECK-LABEL: lsx_vfnmadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfnmadd.s $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double>, <2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfnmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { -+; CHECK-LABEL: lsx_vfnmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfnmadd.d $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll -new file mode 100644 -index 000000000000..46db0f4a5061 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float>, <4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfnmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { -+; CHECK-LABEL: lsx_vfnmsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double>, <2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfnmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { -+; CHECK-LABEL: lsx_vfnmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll -new file mode 100644 -index 000000000000..669fde5912d4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrecip_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrecip_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrecip.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrecip_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrecip_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrecip.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll -new file mode 100644 -index 000000000000..8d872fc72962 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrintrne_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrne.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrintrne_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrne.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrintrz_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrz_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrz.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrintrz_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrz.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrintrp_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrp_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrp.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrintrp_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrp_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrp.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrintrm_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrm_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrm.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrintrm_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrm_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrm.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrint_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrint_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrint.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrint_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrint_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrint.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll -new file mode 100644 -index 000000000000..326d87308b0b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrsqrt_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrsqrt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrsqrt_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrsqrt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll -new file mode 100644 -index 000000000000..5c072b194d4f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vfrstp_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vfrstp_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrstp.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16>, <8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vfrstp_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vfrstp_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrstp.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vfrstpi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrstpi.b $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vfrstpi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrstpi.h $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll -new file mode 100644 -index 000000000000..55bffba9e99e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float>) -+ -+define <4 x float> @lsx_vfsqrt_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfsqrt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfsqrt.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double>) -+ -+define <2 x double> @lsx_vfsqrt_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfsqrt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfsqrt.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll -new file mode 100644 -index 000000000000..2beba4a70dc9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfsub_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfsub_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll -new file mode 100644 -index 000000000000..2a494cd7fa87 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll -@@ -0,0 +1,350 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrne_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrne_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrne.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrne_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrne_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrne.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrz_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrz_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrz_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrz_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrp_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrp_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrp.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrp_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrp_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrp.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrm_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrm_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrm.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrm_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrm_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrm.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftint_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftint_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftint_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftint_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrz_wu_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrz_wu_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrz_lu_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrz_lu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftint_wu_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftint_wu_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.wu.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftint_lu_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftint_lu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.lu.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftintrne_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftintrne_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrne.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftintrz_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftintrz_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftintrp_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftintrp_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrp.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftintrm_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftintrm_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrm.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftint_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftint_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrnel_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrnel_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrnel.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrneh_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrneh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrneh.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrzl_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrzl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrzl.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrzh_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrzh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrzh.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrpl_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrpl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrpl.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrph_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrph_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrph.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrml_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrml_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrml.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrmh_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrmh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrmh.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintl_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintl.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftinth_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftinth_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftinth.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll -new file mode 100644 -index 000000000000..05725582334a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vhaddw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vhaddw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vhaddw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vhaddw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vhaddw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.hu.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vhaddw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.wu.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vhaddw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.du.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vhaddw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.qu.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll -new file mode 100644 -index 000000000000..dd5815b2ea85 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vhsubw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vhsubw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vhsubw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vhsubw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vhsubw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.hu.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vhsubw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.wu.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vhsubw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.du.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vhsubw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.qu.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll -new file mode 100644 -index 000000000000..77b0b3484df8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vilvl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vilvl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vilvl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vilvl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvl.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vilvh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvh_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vilvh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvh_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vilvh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvh_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vilvh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvh_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvh.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll -new file mode 100644 -index 000000000000..61d2cbd28066 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll -@@ -0,0 +1,54 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) -+ -+define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vinsgr2vr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) -+ -+define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vinsgr2vr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 7) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) -+ -+define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vinsgr2vr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 3) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) -+ -+define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vinsgr2vr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll -new file mode 100644 -index 000000000000..b9e2ff8088d8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) -+ -+define <16 x i8> @lsx_vld(i8* %p) nounwind { -+; CHECK-LABEL: lsx_vld: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vldx(i8*, i64) -+ -+define <16 x i8> @lsx_vldx(i8* %p, i64 %b) nounwind { -+; CHECK-LABEL: lsx_vldx: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldx $vr0, $a0, $a1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldx(i8* %p, i64 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll -new file mode 100644 -index 000000000000..ace910b54d9a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) -+ -+define <2 x i64> @lsx_vldi() nounwind { -+; CHECK-LABEL: lsx_vldi: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldi $vr0, 4095 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4095) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) -+ -+define <16 x i8> @lsx_vrepli_b() nounwind { -+; CHECK-LABEL: lsx_vrepli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.b $vr0, 511 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 511) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) -+ -+define <8 x i16> @lsx_vrepli_h() nounwind { -+; CHECK-LABEL: lsx_vrepli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.h $vr0, 511 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 511) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) -+ -+define <4 x i32> @lsx_vrepli_w() nounwind { -+; CHECK-LABEL: lsx_vrepli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.w $vr0, 511 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 511) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) -+ -+define <2 x i64> @lsx_vrepli_d() nounwind { -+; CHECK-LABEL: lsx_vrepli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.d $vr0, 511 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 511) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll -new file mode 100644 -index 000000000000..1a9cf3d3a766 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) -+ -+define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vldrepl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldrepl.b $vr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) -+ -+define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vldrepl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldrepl.h $vr0, $a0, 2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) -+ -+define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vldrepl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldrepl.w $vr0, $a0, 4 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 4) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) -+ -+define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vldrepl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldrepl.d $vr0, $a0, 8 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 8) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll -new file mode 100644 -index 000000000000..89503724fd73 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmadd_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmadd.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16>, <8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmadd_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmadd.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32>, <4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmadd_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmadd.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmadd_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmadd.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll -new file mode 100644 -index 000000000000..1e3ab25a5fcf ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwev_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.h.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwev_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.w.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwev_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.d.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwev_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.q.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwev_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.h.bu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwev_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.w.hu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwev_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.d.wu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwev_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.q.du $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwev_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.h.bu.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwev_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.w.hu.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwev_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.d.wu.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.q.du.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwod_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.h.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwod_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.w.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwod_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.d.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwod_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.q.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwod_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.h.bu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwod_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.w.hu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwod_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.d.wu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwod_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.q.du $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwod_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.h.bu.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwod_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.w.hu.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwod_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.d.wu.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.q.du.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll -new file mode 100644 -index 000000000000..4dd289cf6ed7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmax_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmax_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmax_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmax_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.b $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.h $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.w $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 15) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.d $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 15) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmax_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmax_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmax_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmax_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.bu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.hu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll -new file mode 100644 -index 000000000000..aa12a5ead6a3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmin_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmin_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmin_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmin_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.w $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.d $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmin_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmin_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmin_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmin_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.bu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.hu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll -new file mode 100644 -index 000000000000..6b3dc6865584 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmod_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmod_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmod_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmod_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll -new file mode 100644 -index 000000000000..3ecd777aee67 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vmskgez_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmskgez_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskgez.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll -new file mode 100644 -index 000000000000..be00c76137c7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vmskltz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmskltz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskltz.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vmskltz_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmskltz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskltz.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vmskltz_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmskltz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskltz.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vmskltz_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmskltz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskltz.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll -new file mode 100644 -index 000000000000..02f1752f7190 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vmsknz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmsknz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsknz.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll -new file mode 100644 -index 000000000000..98684e10c78e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmsub_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmsub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsub.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16>, <8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmsub_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmsub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsub.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32>, <4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmsub_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmsub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsub.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmsub_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsub.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll -new file mode 100644 -index 000000000000..a4deb8f8f823 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmuh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmuh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmuh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmuh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmuh_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmuh_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmuh_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmuh_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll -new file mode 100644 -index 000000000000..aca60d1663b7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmul_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmul_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmul_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmul_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmul_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmul_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmul_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmul_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll -new file mode 100644 -index 000000000000..eb55c1f809e3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.q.du.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.q.du.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll -new file mode 100644 -index 000000000000..43c6e9757614 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vneg_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vneg_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vneg.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vneg_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vneg_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vneg.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vneg_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vneg_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vneg.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vneg_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vneg_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vneg.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll -new file mode 100644 -index 000000000000..16619225f2d1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vnor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vnor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll -new file mode 100644 -index 000000000000..c2388a1e0da3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vnori_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vnori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vnori.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll -new file mode 100644 -index 000000000000..ab557003d150 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll -new file mode 100644 -index 000000000000..85c0f432c54a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vori_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vori.b $vr0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 3) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll -new file mode 100644 -index 000000000000..4528628e02c3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vorn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vorn_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll -new file mode 100644 -index 000000000000..70a3620d1757 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vpackev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vpackev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vpackev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackev.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vpackev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vpackod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackod.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vpackod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackod.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vpackod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackod.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vpackod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll -new file mode 100644 -index 000000000000..431b270ab0a1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vpcnt_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vpcnt_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpcnt.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vpcnt_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vpcnt_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpcnt.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vpcnt_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vpcnt_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpcnt.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vpcnt_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vpcnt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpcnt.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll -new file mode 100644 -index 000000000000..b8367d98caf6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpermi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpermi.w $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 255) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll -new file mode 100644 -index 000000000000..4ebf29e1409c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vpickev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vpickev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vpickev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickev.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vpickev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickev.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vpickod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickod.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vpickod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickod.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vpickod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickod.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vpickod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickod.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll -new file mode 100644 -index 000000000000..ed56d30ce3c4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 15) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 7) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 3) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 1) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.bu $a0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 15) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 7) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.wu $a0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 3) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.du $a0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 1) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll -new file mode 100644 -index 000000000000..091f1c98c228 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) -+ -+define <16 x i8> @lsx_vreplgr2vr_b(i32 %a) nounwind { -+; CHECK-LABEL: lsx_vreplgr2vr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.b $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32) -+ -+define <8 x i16> @lsx_vreplgr2vr_h(i32 %a) nounwind { -+; CHECK-LABEL: lsx_vreplgr2vr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32) -+ -+define <4 x i32> @lsx_vreplgr2vr_w(i32 %a) nounwind { -+; CHECK-LABEL: lsx_vreplgr2vr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64) -+ -+define <2 x i64> @lsx_vreplgr2vr_d(i64 %a) nounwind { -+; CHECK-LABEL: lsx_vreplgr2vr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll -new file mode 100644 -index 000000000000..3ba184dad052 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vreplve_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vreplve_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vreplve_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vreplve_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vreplve_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vreplve_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vreplve_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vreplve_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll -new file mode 100644 -index 000000000000..9b8af1878cb8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vreplvei_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplvei.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vreplvei_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplvei.h $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 7) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vreplvei_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 3) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vreplvei_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll -new file mode 100644 -index 000000000000..df8650677147 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vrotr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vrotr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vrotr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vrotr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vrotr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vrotr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vrotr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vrotr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotr.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vrotri_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vrotri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotri.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vrotri_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vrotri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotri.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vrotri_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vrotri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotri.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vrotri_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vrotri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotri.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll -new file mode 100644 -index 000000000000..a54f955766df ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsadd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsadd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsadd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsadd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll -new file mode 100644 -index 000000000000..4286842a63b9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.h $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.w $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.d $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.bu $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.hu $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.du $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll -new file mode 100644 -index 000000000000..3cb4acd82439 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vseq_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vseq_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseq.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vseq_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vseq_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseq.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vseq_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vseq_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseq.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vseq_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vseq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vseqi_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vseqi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vseqi_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vseqi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vseqi_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vseqi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseqi.w $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vseqi_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vseqi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseqi.d $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -16) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll -new file mode 100644 -index 000000000000..3188fb4e2c2e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.bz.v(<16 x i8>) -+ -+define i32 @lsx_bz_v(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_bz_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseteqz.v $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bnz.v(<16 x i8>) -+ -+define i32 @lsx_bnz_v(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetnez.v $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll -new file mode 100644 -index 000000000000..22e01922e87b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.bnz.b(<16 x i8>) -+ -+define i32 @lsx_bnz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetallnez.b $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bnz.h(<8 x i16>) -+ -+define i32 @lsx_bnz_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetallnez.h $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bnz.w(<4 x i32>) -+ -+define i32 @lsx_bnz_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetallnez.w $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB2_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB2_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bnz.d(<2 x i64>) -+ -+define i32 @lsx_bnz_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetallnez.d $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB3_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB3_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll -new file mode 100644 -index 000000000000..96c79c10e468 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.bz.b(<16 x i8>) -+ -+define i32 @lsx_bz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_bz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetanyeqz.b $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bz.h(<8 x i16>) -+ -+define i32 @lsx_bz_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_bz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetanyeqz.h $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bz.w(<4 x i32>) -+ -+define i32 @lsx_bz_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_bz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetanyeqz.w $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB2_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB2_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bz.d(<2 x i64>) -+ -+define i32 @lsx_bz_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_bz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetanyeqz.d $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB3_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB3_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll -new file mode 100644 -index 000000000000..f5d516521e45 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vshuf_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vshuf_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16>, <8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vshuf_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vshuf_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32>, <4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vshuf_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vshuf_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vshuf_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vshuf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll -new file mode 100644 -index 000000000000..1ad5f2af5591 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vshuf4i_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 255) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vshuf4i_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 255) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vshuf4i_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 255) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vshuf4i_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 255) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll -new file mode 100644 -index 000000000000..3997b0cc995c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsigncov_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsigncov_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsigncov.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsigncov_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsigncov_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsigncov.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsigncov_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsigncov_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsigncov.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsigncov_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsigncov_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsigncov.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll -new file mode 100644 -index 000000000000..5a9d5f06e63f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsle_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsle_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsle_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsle_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.w $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.d $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsle_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsle_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsle_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsle_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.bu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.hu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll -new file mode 100644 -index 000000000000..7bc20af41f17 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsll_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsll_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsll.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsll_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsll_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsll.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsll_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsll_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsll.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsll_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsll_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsll.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslli_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslli.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslli_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslli.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslli_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslli.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslli_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslli.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll -new file mode 100644 -index 000000000000..29ab70da1ced ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 7) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 15) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll -new file mode 100644 -index 000000000000..18683e9dc46f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vslt_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vslt_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vslt_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vslt_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.w $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.d $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vslt_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vslt_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vslt_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vslt_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.bu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.hu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll -new file mode 100644 -index 000000000000..e85c8464c18e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsra_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsra_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsra.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsra_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsra_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsra.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsra_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsra_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsra.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsra_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsra_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsra.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrai_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsrai_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrai_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsrai_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrai_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsrai_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrai_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsrai_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll -new file mode 100644 -index 000000000000..4ffe5a704c2c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vsran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsran_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsran.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vsran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsran_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsran.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vsran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsran_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsran.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll -new file mode 100644 -index 000000000000..717c641616c8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrani_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrani.b.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrani_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrani.h.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrani_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrani.w.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrani_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrani.d.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll -new file mode 100644 -index 000000000000..8b52b7ac9631 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsrar_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrar_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrar.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsrar_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrar_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrar.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsrar_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrar_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrar.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsrar_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrar_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrar.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrari_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsrari_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrari.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrari_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsrari_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrari.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrari_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsrari_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrari.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrari_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsrari_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrari.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll -new file mode 100644 -index 000000000000..d4cdfb5359ea ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vsrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarn.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vsrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarn.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vsrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarn.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll -new file mode 100644 -index 000000000000..2253e88372fc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll -new file mode 100644 -index 000000000000..1cddd9622233 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsrl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsrl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsrl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsrl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrl.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrli_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsrli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrli_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsrli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrli_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsrli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrli_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsrli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll -new file mode 100644 -index 000000000000..1c9b23243ffb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vsrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrln_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrln.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vsrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrln_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrln.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vsrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrln_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrln.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll -new file mode 100644 -index 000000000000..6e523efa1824 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll -new file mode 100644 -index 000000000000..51638fa1a47f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsrlr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlr.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsrlr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlr.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsrlr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlr.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsrlr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlr.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsrlri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlri.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsrlri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlri.h $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsrlri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlri.w $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsrlri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlri.d $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll -new file mode 100644 -index 000000000000..893e51396241 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vsrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrn.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vsrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrn.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vsrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrn.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll -new file mode 100644 -index 000000000000..d1ea450d2237 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrni.b.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrni.h.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrni.w.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrni.d.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll -new file mode 100644 -index 000000000000..cecccbb730c9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssran_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.bu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssran_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.hu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssran_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.wu.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll -new file mode 100644 -index 000000000000..57b8eb169866 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.bu.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.hu.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.wu.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.du.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll -new file mode 100644 -index 000000000000..c6b7d9ec8e1d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrarn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.bu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrarn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.hu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrarn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.wu.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll -new file mode 100644 -index 000000000000..1a2e91962ac3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.bu.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.hu.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.wu.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.du.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll -new file mode 100644 -index 000000000000..697ccc3962a8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrln_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.bu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrln_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.hu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrln_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.wu.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll -new file mode 100644 -index 000000000000..8dd41e7abe87 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.bu.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.hu.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.wu.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.du.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll -new file mode 100644 -index 000000000000..a8e76cbaa7fd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrlrn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.bu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrlrn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.hu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrlrn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.wu.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll -new file mode 100644 -index 000000000000..869e81b2b09d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.bu.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.hu.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.wu.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.du.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll -new file mode 100644 -index 000000000000..c594b426d650 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vssub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vssub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vssub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vssub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vssub_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vssub_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vssub_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vssub_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll -new file mode 100644 -index 000000000000..798f509f2318 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) -+ -+define void @lsx_vst(<16 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vst: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vst $vr0, $a0, -2048 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2048) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstx(<16 x i8>, i8*, i64) -+ -+define void @lsx_vstx(<16 x i8> %va, i8* %p, i64 %c) nounwind { -+; CHECK-LABEL: lsx_vstx: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstx $vr0, $a0, $a1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstx(<16 x i8> %va, i8* %p, i64 %c) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll -new file mode 100644 -index 000000000000..6b9e7a9d7462 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) -+ -+define void @lsx_vstelm_b(<16 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vstelm_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstelm.b $vr0, $a0, 1, 15 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 15) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) -+ -+define void @lsx_vstelm_h(<8 x i16> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vstelm_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstelm.h $vr0, $a0, 2, 7 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 7) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) -+ -+define void @lsx_vstelm_w(<4 x i32> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vstelm_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstelm.w $vr0, $a0, 4, 3 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 3) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) -+ -+define void @lsx_vstelm_d(<2 x i64> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vstelm_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstelm.d $vr0, $a0, 8, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 1) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll -new file mode 100644 -index 000000000000..5c04a3d8de0d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsub_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.q $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll -new file mode 100644 -index 000000000000..304a4e4a78cc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsubi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsubi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsubi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsubi_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsubi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll -new file mode 100644 -index 000000000000..48100db74334 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vsubwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vsubwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vsubwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsubwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vsubwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vsubwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vsubwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsubwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vsubwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vsubwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vsubwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsubwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vsubwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vsubwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vsubwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsubwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll -new file mode 100644 -index 000000000000..72a1fe93c2c0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vxor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vxor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll -new file mode 100644 -index 000000000000..09669cd5ac14 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vxori_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vxori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vxori.b $vr0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 3) -+ ret <16 x i8> %res -+} --- -2.20.1 - - -From fd469d4a3c3b439f40accda691597502bc444a99 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:12:27 +0800 -Subject: [PATCH 05/35] [LoongArch] Add LASX intrinsic testcases - -Depends on D155830 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D155835 - -(cherry picked from commit 83311b2b5d1b9869f9a7b265994394ea898448a2) ---- - .../CodeGen/LoongArch/lasx/intrinsic-absd.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-add.ll | 62 ++ - .../CodeGen/LoongArch/lasx/intrinsic-adda.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-addi.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-addw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-and.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-andi.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-andn.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-avg.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-avgr.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-bitclr.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-bitrev.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-bitsel.ll | 14 + - .../LoongArch/lasx/intrinsic-bitseli.ll | 14 + - .../LoongArch/lasx/intrinsic-bitset.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-bsll.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-bsrl.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-clo.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-clz.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-div.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-ext2xv.ll | 146 +++++ - .../CodeGen/LoongArch/lasx/intrinsic-exth.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-extl.ll | 26 + - .../LoongArch/lasx/intrinsic-extrins.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-fadd.ll | 26 + - .../LoongArch/lasx/intrinsic-fclass.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fcmp.ll | 530 ++++++++++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-fcvt.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fcvth.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fdiv.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-ffint.ll | 86 +++ - .../CodeGen/LoongArch/lasx/intrinsic-flogb.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmadd.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmax.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmin.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmina.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmsub.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmul.ll | 26 + - .../LoongArch/lasx/intrinsic-fnmadd.ll | 26 + - .../LoongArch/lasx/intrinsic-fnmsub.ll | 26 + - .../LoongArch/lasx/intrinsic-frecip.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-frint.ll | 122 ++++ - .../LoongArch/lasx/intrinsic-frsqrt.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-frstp.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fsub.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-ftint.ll | 350 ++++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-haddw.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-hsubw.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-ilv.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-insgr2vr.ll | 28 + - .../LoongArch/lasx/intrinsic-insve0.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-ld.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-ldi.ll | 62 ++ - .../LoongArch/lasx/intrinsic-ldrepl.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-madd.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-maddw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-max.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-min.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-mod.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-mskgez.ll | 14 + - .../LoongArch/lasx/intrinsic-mskltz.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-msknz.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-msub.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-muh.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-mul.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-mulw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-neg.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-nor.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-nori.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-or.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-ori.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-orn.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-pack.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-pcnt.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-perm.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-permi.ll | 38 ++ - .../CodeGen/LoongArch/lasx/intrinsic-pick.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-pickve.ll | 50 ++ - .../LoongArch/lasx/intrinsic-pickve2gr.ll | 53 ++ - .../LoongArch/lasx/intrinsic-repl128vei.ll | 50 ++ - .../LoongArch/lasx/intrinsic-replgr2vr.ll | 50 ++ - .../LoongArch/lasx/intrinsic-replve.ll | 50 ++ - .../LoongArch/lasx/intrinsic-replve0.ll | 62 ++ - .../CodeGen/LoongArch/lasx/intrinsic-rotr.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-sadd.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-sat.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-seq.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-set.ll | 38 ++ - .../LoongArch/lasx/intrinsic-setallnez.ll | 74 +++ - .../LoongArch/lasx/intrinsic-setanyeqz.ll | 74 +++ - .../CodeGen/LoongArch/lasx/intrinsic-shuf.ll | 50 ++ - .../LoongArch/lasx/intrinsic-shuf4i.ll | 50 ++ - .../LoongArch/lasx/intrinsic-signcov.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-sle.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-sll.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-sllwil.ll | 74 +++ - .../CodeGen/LoongArch/lasx/intrinsic-slt.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-sra.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-sran.ll | 38 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srani.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srar.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-srarn.ll | 38 ++ - .../LoongArch/lasx/intrinsic-srarni.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srl.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-srln.ll | 38 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srlni.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srlr.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-srlrn.ll | 38 ++ - .../LoongArch/lasx/intrinsic-srlrni.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-ssran.ll | 74 +++ - .../LoongArch/lasx/intrinsic-ssrani.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-ssrarn.ll | 74 +++ - .../LoongArch/lasx/intrinsic-ssrarni.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-ssrln.ll | 74 +++ - .../LoongArch/lasx/intrinsic-ssrlni.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-ssrlrn.ll | 74 +++ - .../LoongArch/lasx/intrinsic-ssrlrni.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-ssub.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-st.ll | 27 + - .../CodeGen/LoongArch/lasx/intrinsic-stelm.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-sub.ll | 62 ++ - .../CodeGen/LoongArch/lasx/intrinsic-subi.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-subw.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-xor.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-xori.ll | 14 + - 128 files changed, 9154 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll -new file mode 100644 -index 000000000000..bf54f44357b0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvabsd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvabsd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvabsd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvabsd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvabsd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvabsd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvabsd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvabsd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll -new file mode 100644 -index 000000000000..0c2f2ace29fc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvadd_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.q $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll -new file mode 100644 -index 000000000000..c1258d53e913 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvadda_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadda_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvadda_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadda_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvadda_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadda_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvadda_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadda_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll -new file mode 100644 -index 000000000000..09b5d07a0151 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvaddi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvaddi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvaddi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvaddi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll -new file mode 100644 -index 000000000000..ef7a1b5a50ef ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.h.bu.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.w.hu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.d.wu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.q.du.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.h.bu.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.w.hu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.d.wu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.q.du.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll -new file mode 100644 -index 000000000000..15f3a8094770 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvand_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvand_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll -new file mode 100644 -index 000000000000..88cf142d6968 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvandi_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvandi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvandi.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll -new file mode 100644 -index 000000000000..f385ef3661cb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvandn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvandn_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll -new file mode 100644 -index 000000000000..488d3b96b003 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvavg_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvavg_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvavg_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvavg_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvavg_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvavg_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvavg_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvavg_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll -new file mode 100644 -index 000000000000..b5ab5a5366aa ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvavgr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvavgr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvavgr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvavgr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvavgr_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvavgr_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvavgr_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvavgr_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll -new file mode 100644 -index 000000000000..cec71bab2fe8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvbitclr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitclr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclr.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvbitclr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitclr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclr.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvbitclr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitclr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclr.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvbitclr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitclr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitclri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclri.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitclri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclri.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitclri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclri.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitclri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclri.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll -new file mode 100644 -index 000000000000..fb4f9fbc2e4b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvbitrev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitrev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrev.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvbitrev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitrev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrev.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvbitrev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitrev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrev.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvbitrev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitrev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrev.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitrevi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrevi.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitrevi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrevi.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitrevi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitrevi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll -new file mode 100644 -index 000000000000..2e91407590ac ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvbitsel_v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvbitsel_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitsel.v $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll -new file mode 100644 -index 000000000000..79dd55cbfef9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitseli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll -new file mode 100644 -index 000000000000..83d1f0ef60c6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvbitset_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitset_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitset.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvbitset_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitset_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitset.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvbitset_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitset_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitset.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvbitset_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitset_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitset.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitseti_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseti.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitseti_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitseti_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitseti_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseti.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll -new file mode 100644 -index 000000000000..cbb63ced5cc0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbsll_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll -new file mode 100644 -index 000000000000..b0c26cbe3e35 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbsrl_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll -new file mode 100644 -index 000000000000..29b2be03d54e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvclo_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvclo_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclo.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvclo_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvclo_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclo.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvclo_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvclo_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclo.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvclo_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvclo_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclo.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll -new file mode 100644 -index 000000000000..5247ceedbd14 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvclz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvclz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclz.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvclz_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvclz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclz.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvclz_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvclz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclz.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvclz_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvclz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclz.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll -new file mode 100644 -index 000000000000..813204092e94 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvdiv_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvdiv_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvdiv_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvdiv_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvdiv_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvdiv_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvdiv_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvdiv_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll -new file mode 100644 -index 000000000000..48721b52af00 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll -@@ -0,0 +1,146 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8>) -+ -+define <16 x i16> @lasx_vext2xv_h_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8>) -+ -+define <8 x i32> @lasx_vext2xv_w_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_w_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.w.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8>) -+ -+define <4 x i64> @lasx_vext2xv_d_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_d_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16>) -+ -+define <8 x i32> @lasx_vext2xv_w_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16>) -+ -+define <4 x i64> @lasx_vext2xv_d_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_d_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.d.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32>) -+ -+define <4 x i64> @lasx_vext2xv_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> %va) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8>) -+ -+define <16 x i16> @lasx_vext2xv_hu_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8>) -+ -+define <8 x i32> @lasx_vext2xv_wu_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_wu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.wu.bu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8>) -+ -+define <4 x i64> @lasx_vext2xv_du_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_du_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16>) -+ -+define <8 x i32> @lasx_vext2xv_wu_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16>) -+ -+define <4 x i64> @lasx_vext2xv_du_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_du_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.du.hu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32>) -+ -+define <4 x i64> @lasx_vext2xv_du_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll -new file mode 100644 -index 000000000000..543589e61b12 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8>) -+ -+define <16 x i16> @lasx_xvexth_h_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.h.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16>) -+ -+define <8 x i32> @lasx_xvexth_w_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.w.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32>) -+ -+define <4 x i64> @lasx_xvexth_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.d.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvexth_q_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.q.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8>) -+ -+define <16 x i16> @lasx_xvexth_hu_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.hu.bu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16>) -+ -+define <8 x i32> @lasx_xvexth_wu_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.wu.hu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32>) -+ -+define <4 x i64> @lasx_xvexth_du_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.du.wu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64>) -+ -+define <4 x i64> @lasx_xvexth_qu_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.qu.du $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll -new file mode 100644 -index 000000000000..7040c8c784cd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvextl_q_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvextl_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextl.q.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64>) -+ -+define <4 x i64> @lasx_xvextl_qu_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvextl_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextl.qu.du $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll -new file mode 100644 -index 000000000000..c8774a7b29c0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvextrins_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextrins.b $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvextrins_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextrins.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvextrins_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextrins.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvextrins_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextrins.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll -new file mode 100644 -index 000000000000..563a0ce9e384 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfadd_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfadd_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll -new file mode 100644 -index 000000000000..901ca5bb0260 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvfclass_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfclass_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfclass.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvfclass_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfclass_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfclass.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll -new file mode 100644 -index 000000000000..b01f908e71af ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll -@@ -0,0 +1,530 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_caf_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_caf_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.caf.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_caf_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_caf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.caf.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cun_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cun_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cun_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cun_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_ceq_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_ceq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_ceq_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_ceq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cueq_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cueq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cueq_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cueq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_clt_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_clt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_clt_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_clt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cult_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cult_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cult_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cult_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cle_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cle_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cle_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cule_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cule_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cule_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cule_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cne_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cne_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cor_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cor_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cor_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cor_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cune_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cune_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cune_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cune_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_saf_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_saf_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.saf.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_saf_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_saf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.saf.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sun_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sun_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sun.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sun_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sun_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sun.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_seq_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_seq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.seq.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_seq_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_seq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.seq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sueq_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sueq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sueq.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sueq_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sueq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sueq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_slt_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_slt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.slt.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_slt_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_slt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.slt.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sult_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sult_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sult.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sult_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sult_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sult.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sle_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sle_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sle.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sle_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sle.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sule_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sule_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sule.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sule_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sule_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sule.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sne_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sne.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sne_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sne.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sor_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sor_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sor.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sor_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sor_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sor.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sune_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sune_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sune.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sune_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sune_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sune.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll -new file mode 100644 -index 000000000000..82bf1d3df72c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float>, <8 x float>) -+ -+define <16 x i16> @lasx_xvfcvt_h_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcvt_h_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvt.h.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> %va, <8 x float> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double>, <4 x double>) -+ -+define <8 x float> @lasx_xvfcvt_s_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcvt_s_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x float> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll -new file mode 100644 -index 000000000000..e1a6a2923e67 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16>) -+ -+define <8 x float> @lasx_xvfcvth_s_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvfcvth_s_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvth.s.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float>) -+ -+define <4 x double> @lasx_xvfcvth_d_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfcvth_d_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvth.d.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll -new file mode 100644 -index 000000000000..0b3e693c7f51 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16>) -+ -+define <8 x float> @lasx_xvfcvtl_s_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvfcvtl_s_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvtl.s.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float>) -+ -+define <4 x double> @lasx_xvfcvtl_d_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfcvtl_d_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvtl.d.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll -new file mode 100644 -index 000000000000..49923ddd4e8d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfdiv_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfdiv_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfdiv.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfdiv_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfdiv_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfdiv.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll -new file mode 100644 -index 000000000000..24da0bd33838 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll -@@ -0,0 +1,86 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32>) -+ -+define <8 x float> @lasx_xvffint_s_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvffint_s_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.s.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64>) -+ -+define <4 x double> @lasx_xvffint_d_l(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvffint_d_l: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.d.l $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32>) -+ -+define <8 x float> @lasx_xvffint_s_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvffint_s_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64>) -+ -+define <4 x double> @lasx_xvffint_d_lu(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvffint_d_lu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> %va) -+ ret <4 x double> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32>) -+ -+define <4 x double> @lasx_xvffintl_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvffintl_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffintl.d.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> %va) -+ ret <4 x double> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32>) -+ -+define <4 x double> @lasx_xvffinth_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvffinth_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffinth.d.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64>, <4 x i64>) -+ -+define <8 x float> @lasx_xvffint_s_l(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvffint_s_l: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.s.l $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x float> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll -new file mode 100644 -index 000000000000..bccef4504d70 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float>) -+ -+define <8 x float> @lasx_xvflogb_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvflogb_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvflogb.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double>) -+ -+define <4 x double> @lasx_xvflogb_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvflogb_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvflogb.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll -new file mode 100644 -index 000000000000..0fc06f971660 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float>, <8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfmadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double>, <4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll -new file mode 100644 -index 000000000000..2422fa0c00d8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmax_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmax_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmax.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmax_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmax_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmax.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll -new file mode 100644 -index 000000000000..cd9ccc656aef ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmaxa_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmaxa_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmaxa.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmaxa_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmaxa_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmaxa.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll -new file mode 100644 -index 000000000000..effb3f9e1d75 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmin_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmin_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmin.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmin_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmin_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmin.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll -new file mode 100644 -index 000000000000..753a6f31ba06 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmina_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmina_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmina.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmina_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmina_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmina.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll -new file mode 100644 -index 000000000000..57909d0dd168 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float>, <8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfmsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double>, <4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll -new file mode 100644 -index 000000000000..9cad6f383066 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmul_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmul_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmul.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmul_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmul_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmul.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll -new file mode 100644 -index 000000000000..c30993590f98 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float>, <8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfnmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfnmadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double>, <4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfnmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfnmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll -new file mode 100644 -index 000000000000..2e7ca695be62 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float>, <8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfnmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfnmsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double>, <4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfnmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfnmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll -new file mode 100644 -index 000000000000..da3a26df2824 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrecip_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrecip_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrecip.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrecip_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrecip_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrecip.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll -new file mode 100644 -index 000000000000..ddead27cd14b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrintrne_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrintrne_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrintrz_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrz_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrintrz_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrintrp_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrp_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrintrp_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrp_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrintrm_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrm_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrintrm_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrm_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrint_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrint_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrint.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrint_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrint_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrint.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll -new file mode 100644 -index 000000000000..6efa8122baf1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrsqrt_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrsqrt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrsqrt_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrsqrt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll -new file mode 100644 -index 000000000000..e83e55a52a11 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvfrstp_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfrstp_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrstp.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16>, <16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvfrstp_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfrstp_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrstp.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfrstpi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrstpi.b $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfrstpi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrstpi.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll -new file mode 100644 -index 000000000000..a13333d8d81c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfsqrt_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfsqrt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfsqrt_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfsqrt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll -new file mode 100644 -index 000000000000..b52774a03618 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfsub_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfsub_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll -new file mode 100644 -index 000000000000..74cd507f16d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll -@@ -0,0 +1,350 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrne_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrne_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrne.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrne_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrne_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrne.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrz_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrz_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrp_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrp_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrp.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrp_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrp_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrp.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrm_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrm_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrm.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrm_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrm_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrm.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftint_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftint_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftint_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftint_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrz_wu_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_wu_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrz_lu_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_lu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftint_wu_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftint_wu_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.wu.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftint_lu_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftint_lu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.lu.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftintrne_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftintrne_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrne.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftintrz_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftintrp_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftintrp_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrp.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftintrm_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftintrm_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrm.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftint_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftint_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrnel_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrnel_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrnel.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrneh_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrneh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrneh.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrzl_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrzl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrzl.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrzh_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrzh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrzh.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrpl_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrpl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrpl.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrph_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrph_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrph.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrml_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrml_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrml.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrmh_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrmh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrmh.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintl_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintl.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftinth_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftinth_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftinth.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll -new file mode 100644 -index 000000000000..2c64ab23806b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvhaddw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvhaddw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvhaddw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvhaddw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvhaddw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.hu.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvhaddw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.wu.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvhaddw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvhaddw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.qu.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll -new file mode 100644 -index 000000000000..a5223c1d89a0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvhsubw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvhsubw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvhsubw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvhsubw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvhsubw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.hu.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvhsubw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.wu.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvhsubw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvhsubw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.qu.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll -new file mode 100644 -index 000000000000..c9d0ca6b0324 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvilvl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvl.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvilvl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvl.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvilvl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvilvl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvl.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvilvh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvh_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvh.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvilvh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvh_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvh.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvilvh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvh_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvh.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvilvh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvh_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvh.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll -new file mode 100644 -index 000000000000..ea98c96464ae ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) -+ -+define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvinsgr2vr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) -+ -+define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvinsgr2vr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll -new file mode 100644 -index 000000000000..27ae819c4144 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvinsve0_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvinsve0_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll -new file mode 100644 -index 000000000000..5ffc629db466 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) -+ -+define <32 x i8> @lasx_xvld(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvld: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvldx(i8*, i64) -+ -+define <32 x i8> @lasx_xvldx(i8* %p, i64 %b) nounwind { -+; CHECK-LABEL: lasx_xvldx: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldx $xr0, $a0, $a1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldx(i8* %p, i64 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll -new file mode 100644 -index 000000000000..59f79dd32af3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) -+ -+define <4 x i64> @lasx_xvldi() nounwind { -+; CHECK-LABEL: lasx_xvldi: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldi $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) -+ -+define <32 x i8> @lasx_xvrepli_b() nounwind { -+; CHECK-LABEL: lasx_xvrepli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.b $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) -+ -+define <16 x i16> @lasx_xvrepli_h() nounwind { -+; CHECK-LABEL: lasx_xvrepli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.h $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) -+ -+define <8 x i32> @lasx_xvrepli_w() nounwind { -+; CHECK-LABEL: lasx_xvrepli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.w $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) -+ -+define <4 x i64> @lasx_xvrepli_d() nounwind { -+; CHECK-LABEL: lasx_xvrepli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.d $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll -new file mode 100644 -index 000000000000..ae6abdf81cbc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) -+ -+define <32 x i8> @lasx_xvldrepl_b(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvldrepl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldrepl.b $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) -+ -+define <16 x i16> @lasx_xvldrepl_h(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvldrepl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldrepl.h $xr0, $a0, 2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) -+ -+define <8 x i32> @lasx_xvldrepl_w(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvldrepl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldrepl.w $xr0, $a0, 4 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 4) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) -+ -+define <4 x i64> @lasx_xvldrepl_d(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvldrepl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldrepl.d $xr0, $a0, 8 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 8) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll -new file mode 100644 -index 000000000000..d3b09396727e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmadd_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmadd.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16>, <16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmadd_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmadd.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32>, <8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmadd_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmadd.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmadd_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmadd.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll -new file mode 100644 -index 000000000000..146624a764a2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwev_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.h.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwev_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.w.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwev_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.d.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwev_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.q.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwev_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.h.bu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwev_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.w.hu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwev_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.d.wu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwev_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.q.du $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwev_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.h.bu.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwev_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.w.hu.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwev_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.d.wu.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.q.du.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwod_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.h.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwod_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.w.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwod_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.d.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwod_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.q.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwod_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.h.bu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwod_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.w.hu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwod_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.d.wu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwod_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.q.du $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwod_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.h.bu.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwod_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.w.hu.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwod_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.d.wu.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.q.du.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll -new file mode 100644 -index 000000000000..9cf09df4439a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmax_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmax_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmax_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmax_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_vmax_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_vmax_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmax_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmax_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmax_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll -new file mode 100644 -index 000000000000..c94b1e4ea44c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmin_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmin_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmin_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmin_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmin_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmin_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmin_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmin_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll -new file mode 100644 -index 000000000000..a177246bb235 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmod_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmod_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmod_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmod_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll -new file mode 100644 -index 000000000000..da87c20ad6ee ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvmskgez_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskgez_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskgez.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll -new file mode 100644 -index 000000000000..b2218487535c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvmskltz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskltz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskltz.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvmskltz_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskltz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskltz.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvmskltz_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskltz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskltz.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvmskltz_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskltz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskltz.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll -new file mode 100644 -index 000000000000..becd2c883a7e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvmsknz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmsknz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsknz.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll -new file mode 100644 -index 000000000000..c89f9578b77d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmsub_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmsub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsub.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16>, <16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmsub_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmsub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsub.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32>, <8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmsub_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmsub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsub.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmsub_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsub.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll -new file mode 100644 -index 000000000000..97461512ce16 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmuh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmuh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmuh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmuh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmuh_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmuh_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmuh_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmuh_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll -new file mode 100644 -index 000000000000..d5d852e58a9f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmul_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmul_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmul_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmul_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmul_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmul_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmul_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmul_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll -new file mode 100644 -index 000000000000..f69e64aa7698 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll -new file mode 100644 -index 000000000000..ecbedf334657 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvneg_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvneg_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvneg.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvneg_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvneg_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvneg.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvneg_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvneg_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvneg.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvneg_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvneg_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvneg.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll -new file mode 100644 -index 000000000000..674746b7624e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvnor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvnor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll -new file mode 100644 -index 000000000000..55eebf87ee92 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvnori_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvnori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvnori.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll -new file mode 100644 -index 000000000000..16462cfafc54 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll -new file mode 100644 -index 000000000000..8e53d88bac37 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvori_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvori.b $xr0, $xr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 3) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll -new file mode 100644 -index 000000000000..3a335cdd3716 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvorn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvorn_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll -new file mode 100644 -index 000000000000..512b30234917 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvpackev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackev.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvpackev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackev.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvpackev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackev.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvpackev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackev.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvpackod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackod.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvpackod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackod.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvpackod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackod.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvpackod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackod.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll -new file mode 100644 -index 000000000000..d77f1d2082c8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvpcnt_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvpcnt_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpcnt.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvpcnt_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvpcnt_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpcnt.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvpcnt_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvpcnt_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpcnt.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvpcnt_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpcnt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpcnt.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll -new file mode 100644 -index 000000000000..4ec434edd4ec ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvperm_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvperm_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll -new file mode 100644 -index 000000000000..0d9f9daabc44 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpermi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpermi.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpermi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpermi_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll -new file mode 100644 -index 000000000000..bbd6d693ca0b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvpickev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickev.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvpickev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickev.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvpickev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickev.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvpickev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickev.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvpickod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickod.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvpickod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickod.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvpickod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickod.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvpickod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickod.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll -new file mode 100644 -index 000000000000..546777bc72ab ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) -+ -+define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve_w_f: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 1) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) -+ -+define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve_d_f: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 1) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll -new file mode 100644 -index 000000000000..0617e7424321 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll -@@ -0,0 +1,53 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+ -+ -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve2gr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 1) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve2gr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 1) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve2gr_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 1) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve2gr_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve2gr.du $a0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 1) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll -new file mode 100644 -index 000000000000..25fab44f461f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvrepl128vei_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvrepl128vei_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvrepl128vei_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvrepl128vei_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll -new file mode 100644 -index 000000000000..c71abd2205c6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32) -+ -+define <32 x i8> @lasx_xvreplgr2vr_b(i32 %a) nounwind { -+; CHECK-LABEL: lasx_xvreplgr2vr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 %a) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32) -+ -+define <16 x i16> @lasx_xvreplgr2vr_h(i32 %a) nounwind { -+; CHECK-LABEL: lasx_xvreplgr2vr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 %a) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32) -+ -+define <8 x i32> @lasx_xvreplgr2vr_w(i32 %a) nounwind { -+; CHECK-LABEL: lasx_xvreplgr2vr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64) -+ -+define <4 x i64> @lasx_xvreplgr2vr_d(i64 %a) nounwind { -+; CHECK-LABEL: lasx_xvreplgr2vr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll -new file mode 100644 -index 000000000000..21d36ff7bb5e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvreplve_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK-LABEL: lasx_xvreplve_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvreplve_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK-LABEL: lasx_xvreplve_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvreplve_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK-LABEL: lasx_xvreplve_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvreplve_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK-LABEL: lasx_xvreplve_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll -new file mode 100644 -index 000000000000..7996bb36ef03 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvreplve0_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvreplve0_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvreplve0_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvreplve0_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8>) -+ -+define <32 x i8> @lasx_xvreplve0_q(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.q $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> %va) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll -new file mode 100644 -index 000000000000..64d2773864e9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvrotr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvrotr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvrotr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvrotr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvrotr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvrotr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvrotr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvrotr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotr.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvrotri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotri.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvrotri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotri.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvrotri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotri.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvrotri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotri.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll -new file mode 100644 -index 000000000000..54a5e2e9c833 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsadd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsadd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsadd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsadd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll -new file mode 100644 -index 000000000000..293b9dc9eb4d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll -new file mode 100644 -index 000000000000..83bc93c88c73 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvseq_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvseq_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseq.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvseq_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvseq_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseq.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvseq_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvseq_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvseq_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvseq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvseqi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseqi.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvseqi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseqi.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvseqi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseqi.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvseqi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseqi.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll -new file mode 100644 -index 000000000000..6e3e2e0330f5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xbz.v(<32 x i8>) -+ -+define i32 @lasx_xbz_v(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseteqz.v $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8>) -+ -+define i32 @lasx_xbnz_v(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetnez.v $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll -new file mode 100644 -index 000000000000..a466b78bf8d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8>) -+ -+define i32 @lasx_xbnz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetallnez.b $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16>) -+ -+define i32 @lasx_xbnz_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetallnez.h $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32>) -+ -+define i32 @lasx_xbnz_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetallnez.w $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB2_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB2_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64>) -+ -+define i32 @lasx_xbnz_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetallnez.d $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB3_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB3_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll -new file mode 100644 -index 000000000000..36e65fc5b328 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xbz.b(<32 x i8>) -+ -+define i32 @lasx_xbz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetanyeqz.b $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbz.h(<16 x i16>) -+ -+define i32 @lasx_xbz_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetanyeqz.h $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbz.w(<8 x i32>) -+ -+define i32 @lasx_xbz_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetanyeqz.w $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB2_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB2_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbz.d(<4 x i64>) -+ -+define i32 @lasx_xbz_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetanyeqz.d $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB3_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB3_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll -new file mode 100644 -index 000000000000..9b9140f6ad62 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvshuf_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvshuf_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16>, <16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvshuf_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvshuf_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32>, <8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvshuf_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvshuf_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvshuf_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvshuf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll -new file mode 100644 -index 000000000000..31205086759c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvshuf4i_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvshuf4i_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvshuf4i_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvshuf4i_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll -new file mode 100644 -index 000000000000..e6c6d8ccd0d3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsigncov_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsigncov_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsigncov.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsigncov_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsigncov_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsigncov.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsigncov_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsigncov_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsigncov.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsigncov_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsigncov_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsigncov.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll -new file mode 100644 -index 000000000000..8895efc84b84 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsle_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsle_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsle_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsle_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsle_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsle_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsle_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsle_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll -new file mode 100644 -index 000000000000..14110b613dbe ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsll_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsll_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsll_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsll_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsll_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsll_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsll_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsll_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsll.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslli_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslli_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslli_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslli_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll -new file mode 100644 -index 000000000000..a72b8a6cbb4f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.h.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.w.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.d.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.hu.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.wu.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.du.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll -new file mode 100644 -index 000000000000..3ea87adff110 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvslt_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvslt_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvslt_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvslt_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvslt_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvslt_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvslt_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvslt_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll -new file mode 100644 -index 000000000000..a7498682559b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsra_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsra_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsra.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsra_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsra_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsra.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsra_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsra_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsra.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsra_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsra_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsra.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrai_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrai_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrai_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrai_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll -new file mode 100644 -index 000000000000..f59ae4c19662 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvsran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsran_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsran.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvsran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsran_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsran.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvsran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsran_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsran.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll -new file mode 100644 -index 000000000000..91fb90da9c52 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrani_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrani.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrani_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrani.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrani_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrani.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrani_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrani.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll -new file mode 100644 -index 000000000000..e2c160557c4d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsrar_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrar_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrar.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsrar_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrar_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrar.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsrar_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrar_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrar.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsrar_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrar_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrar.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrari_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrari.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrari_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrari.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrari_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrari.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrari_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrari.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll -new file mode 100644 -index 000000000000..02dd989773ca ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvsrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarn.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvsrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarn.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvsrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarn.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll -new file mode 100644 -index 000000000000..a7d2c3739793 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll -new file mode 100644 -index 000000000000..7b2992f2ca3b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsrl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsrl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsrl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsrl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrl.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll -new file mode 100644 -index 000000000000..dc5c0e016ea0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvsrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrln_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrln.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvsrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrln_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrln.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvsrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrln_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrln.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll -new file mode 100644 -index 000000000000..0301ebb195e2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll -new file mode 100644 -index 000000000000..e04504158e27 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsrlr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlr.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsrlr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlr.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsrlr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlr.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsrlr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlr.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrlri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlri.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrlri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlri.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrlri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlri.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrlri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlri.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll -new file mode 100644 -index 000000000000..1e7df379c6e1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvsrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrn.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvsrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrn.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvsrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrn.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll -new file mode 100644 -index 000000000000..56dbafe8b1ac ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll -new file mode 100644 -index 000000000000..da1857dad145 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssran_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.bu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssran_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.hu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssran_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.wu.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll -new file mode 100644 -index 000000000000..9efa659b4a1e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.bu.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.hu.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.wu.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.du.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll -new file mode 100644 -index 000000000000..b5d59ff06f4d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrarn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.bu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrarn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.hu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrarn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.wu.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll -new file mode 100644 -index 000000000000..da411dad645b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.bu.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.hu.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.wu.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.du.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll -new file mode 100644 -index 000000000000..c60b5bdf81a0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrln_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.bu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrln_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.hu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrln_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.wu.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll -new file mode 100644 -index 000000000000..e57dd426bde8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.bu.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.hu.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.wu.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.du.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll -new file mode 100644 -index 000000000000..774cf1bd5e84 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrlrn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.bu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrlrn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.hu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrlrn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.wu.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll -new file mode 100644 -index 000000000000..9a80516d8d78 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.bu.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.hu.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.wu.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.du.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll -new file mode 100644 -index 000000000000..cd3ccd9f5262 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvssub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvssub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvssub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvssub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvssub_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvssub_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvssub_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvssub_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll -new file mode 100644 -index 000000000000..b69e7b813f0c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll -@@ -0,0 +1,27 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) -+ -+define void @lasx_xvst(<32 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvst: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvst $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 1) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstx(<32 x i8>, i8*, i64) -+ -+define void @lasx_xvstx(<32 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstx: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a1, $zero, 1 -+; CHECK-NEXT: xvstx $xr0, $a0, $a1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstx(<32 x i8> %va, i8* %p, i64 1) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll -new file mode 100644 -index 000000000000..52ef3c471412 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstelm_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvstelm.b $xr0, $a0, 1, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 1) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstelm_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvstelm.h $xr0, $a0, 2, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 1) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstelm_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvstelm.w $xr0, $a0, 4, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 1) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstelm_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvstelm.d $xr0, $a0, 8, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 1) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll -new file mode 100644 -index 000000000000..4d69dd83dcde ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsub_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.q $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll -new file mode 100644 -index 000000000000..cc3235ff4657 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsubi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsubi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsubi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsubi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubi.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll -new file mode 100644 -index 000000000000..6f203e894990 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvsubwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvsubwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvsubwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsubwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvsubwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvsubwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvsubwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsubwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvsubwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvsubwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvsubwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsubwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvsubwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvsubwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvsubwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsubwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll -new file mode 100644 -index 000000000000..6395b3d6f2e7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvxor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvxor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll -new file mode 100644 -index 000000000000..c71d7e731165 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvxori_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvxori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvxori.b $xr0, $xr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 3) -+ ret <32 x i8> %res -+} --- -2.20.1 - - -From 45434adc9e68b15a6fc26f55659416ca2ef28ee3 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:14:12 +0800 -Subject: [PATCH 06/35] [LoongArch] Add testcases of LASX intrinsics with - immediates - -The testcases mainly cover three situations: -- the arguments which should be immediates are non immediates. -- the immediate is out of upper limit of the argument type. -- the immediate is out of lower limit of the argument type. - -Depends on D155830 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D157571 - -(cherry picked from commit 82bbf7003cabe2b6be8ab9b88bc96ecb8a64dc49) ---- - .../lasx/intrinsic-addi-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-addi-non-imm.ll | 37 +++++ - .../lasx/intrinsic-andi-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-andi-non-imm.ll | 10 ++ - .../lasx/intrinsic-bitclr-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-bitclr-non-imm.ll | 37 +++++ - .../lasx/intrinsic-bitrev-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-bitrev-non-imm.ll | 37 +++++ - .../lasx/intrinsic-bitseli-invalid-imm.ll | 17 +++ - .../lasx/intrinsic-bitseli-non-imm.ll | 10 ++ - .../lasx/intrinsic-bitset-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-bitset-non-imm.ll | 37 +++++ - .../lasx/intrinsic-bsll-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-bsll-non-imm.ll | 10 ++ - .../lasx/intrinsic-bsrl-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-bsrl-non-imm.ll | 10 ++ - .../lasx/intrinsic-extrins-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-extrins-non-imm.ll | 37 +++++ - .../lasx/intrinsic-frstp-invalid-imm.ll | 33 +++++ - .../LoongArch/lasx/intrinsic-frstp-non-imm.ll | 19 +++ - .../lasx/intrinsic-insgr2vr-invalid-imm.ll | 33 +++++ - .../lasx/intrinsic-insgr2vr-non-imm.ll | 19 +++ - .../lasx/intrinsic-insve0-invalid-imm.ll | 33 +++++ - .../lasx/intrinsic-insve0-non-imm.ll | 19 +++ - .../lasx/intrinsic-ld-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-ld-non-imm.ll | 10 ++ - .../lasx/intrinsic-ldi-invalid-imm.ll | 81 +++++++++++ - .../LoongArch/lasx/intrinsic-ldi-non-imm.ll | 46 +++++++ - .../lasx/intrinsic-ldrepl-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-ldrepl-non-imm.ll | 37 +++++ - .../lasx/intrinsic-max-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-max-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-min-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-min-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-nori-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-nori-non-imm.ll | 10 ++ - .../lasx/intrinsic-ori-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-ori-non-imm.ll | 10 ++ - .../lasx/intrinsic-permi-invalid-imm.ll | 49 +++++++ - .../LoongArch/lasx/intrinsic-permi-non-imm.ll | 28 ++++ - .../lasx/intrinsic-pickve-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-pickve-non-imm.ll | 37 +++++ - .../lasx/intrinsic-pickve2gr-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-pickve2gr-non-imm.ll | 37 +++++ - .../lasx/intrinsic-repl128vei-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-repl128vei-non-imm.ll | 37 +++++ - .../lasx/intrinsic-rotr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-rotr-non-imm.ll | 37 +++++ - .../lasx/intrinsic-sat-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-sat-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-seq-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-seq-non-imm.ll | 37 +++++ - .../lasx/intrinsic-shuf4i-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-shuf4i-non-imm.ll | 37 +++++ - .../lasx/intrinsic-sle-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-sle-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-sll-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-sll-non-imm.ll | 37 +++++ - .../lasx/intrinsic-sllwil-invalid-imm.ll | 97 +++++++++++++ - .../lasx/intrinsic-sllwil-non-imm.ll | 55 ++++++++ - .../lasx/intrinsic-slt-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-slt-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-sra-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-sra-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srani-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srani-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srar-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srar-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srarni-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-srarni-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srl-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srl-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srlni-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srlni-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srlr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srlr-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srlrni-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-srlrni-non-imm.ll | 37 +++++ - .../lasx/intrinsic-ssrani-invalid-imm.ll | 129 ++++++++++++++++++ - .../lasx/intrinsic-ssrani-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-ssrarni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lasx/intrinsic-ssrarni-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-ssrlni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lasx/intrinsic-ssrlni-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-ssrlrni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lasx/intrinsic-ssrlrni-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-st-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-st-non-imm.ll | 10 ++ - .../lasx/intrinsic-stelm-invalid-imm.ll | 121 ++++++++++++++++ - .../LoongArch/lasx/intrinsic-stelm-non-imm.ll | 65 +++++++++ - .../lasx/intrinsic-subi-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-subi-non-imm.ll | 37 +++++ - .../lasx/intrinsic-xori-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-xori-non-imm.ll | 10 ++ - 94 files changed, 5003 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll -new file mode 100644 -index 000000000000..4998847f0910 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvaddi_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvaddi_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvaddi_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvaddi_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvaddi_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvaddi_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvaddi_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvaddi_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll -new file mode 100644 -index 000000000000..f25f0e61a28e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll -new file mode 100644 -index 000000000000..60f0b765f954 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvandi_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvandi_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll -new file mode 100644 -index 000000000000..1273dc6b450b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvandi_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll -new file mode 100644 -index 000000000000..ecc287e89bbc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitclri_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbitclri_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitclri_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvbitclri_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitclri_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvbitclri_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitclri_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvbitclri_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll -new file mode 100644 -index 000000000000..09da85411082 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll -new file mode 100644 -index 000000000000..dff0884fdd5a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitrevi_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbitrevi_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitrevi_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvbitrevi_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitrevi_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvbitrevi_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitrevi_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvbitrevi_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll -new file mode 100644 -index 000000000000..e1aef1a82f0c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll -new file mode 100644 -index 000000000000..3f6fd44f842c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseli_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbitseli_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll -new file mode 100644 -index 000000000000..40533ab96d86 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll -new file mode 100644 -index 000000000000..17a77ece7775 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseti_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbitseti_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitseti_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvbitseti_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitseti_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvbitseti_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitseti_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvbitseti_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll -new file mode 100644 -index 000000000000..613285804e0e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll -new file mode 100644 -index 000000000000..1da08a633bd2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsll_v_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbsll_v_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll -new file mode 100644 -index 000000000000..e19a3232c179 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll -new file mode 100644 -index 000000000000..5d2b63391e67 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsrl_v_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbsrl_v_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll -new file mode 100644 -index 000000000000..8dfd0ca579b8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll -new file mode 100644 -index 000000000000..1301b8a146eb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvextrins_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvextrins_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 256) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvextrins_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvextrins_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 256) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvextrins_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvextrins_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 256) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvextrins_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvextrins_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 256) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll -new file mode 100644 -index 000000000000..bca8f8b3c778 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll -new file mode 100644 -index 000000000000..64b4632669d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll -@@ -0,0 +1,33 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvfrstpi_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvfrstpi_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvfrstpi_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvfrstpi_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll -new file mode 100644 -index 000000000000..ca92cff9b2d1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll -@@ -0,0 +1,19 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll -new file mode 100644 -index 000000000000..4982f2c7d43a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll -@@ -0,0 +1,33 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) -+ -+define <8 x i32> @lasx_xvinsgr2vr_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvinsgr2vr_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 8) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) -+ -+define <4 x i64> @lasx_xvinsgr2vr_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvinsgr2vr_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 4) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll -new file mode 100644 -index 000000000000..3accabf6dbd9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll -@@ -0,0 +1,19 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) -+ -+define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) -+ -+define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll -new file mode 100644 -index 000000000000..a54fa8515fba ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll -@@ -0,0 +1,33 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvinsve0_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvinsve0_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 8) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvinsve0_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvinsve0_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 4) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll -new file mode 100644 -index 000000000000..53e59db11aa6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll -@@ -0,0 +1,19 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll -new file mode 100644 -index 000000000000..20dd8a45d7f0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) -+ -+define <32 x i8> @lasx_xvld_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvld: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 -2049) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvld_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvld: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 2048) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll -new file mode 100644 -index 000000000000..b23436a44832 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) -+ -+define <32 x i8> @lasx_xvld(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 %a) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll -new file mode 100644 -index 000000000000..f3dd3650cf8a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll -@@ -0,0 +1,81 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) -+ -+define <4 x i64> @lasx_xvldi_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvldi: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 -4097) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvldi_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvldi: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 4096) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) -+ -+define <32 x i8> @lasx_xvrepli_b_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 -513) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvrepli_b_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 512) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) -+ -+define <16 x i16> @lasx_xvrepli_h_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 -513) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvrepli_h_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 512) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) -+ -+define <8 x i32> @lasx_xvrepli_w_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 -513) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvrepli_w_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 512) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) -+ -+define <4 x i64> @lasx_xvrepli_d_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 -513) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvrepli_d_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 512) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll -new file mode 100644 -index 000000000000..6466818bf674 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll -@@ -0,0 +1,46 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) -+ -+define <4 x i64> @lasx_xvldi(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 %a) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) -+ -+define <32 x i8> @lasx_xvrepli_b(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 %a) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) -+ -+define <16 x i16> @lasx_xvrepli_h(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 %a) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) -+ -+define <8 x i32> @lasx_xvrepli_w(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 %a) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) -+ -+define <4 x i64> @lasx_xvrepli_d(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 %a) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll -new file mode 100644 -index 000000000000..cb62a839985a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) -+ -+define <32 x i8> @lasx_xvldrepl_b_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 -2049) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvldrepl_b_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 2048) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) -+ -+define <16 x i16> @lasx_xvldrepl_h_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 -2050) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvldrepl_h_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2048) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) -+ -+define <8 x i32> @lasx_xvldrepl_w_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 -2052) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvldrepl_w_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 2048) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) -+ -+define <4 x i64> @lasx_xvldrepl_d_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 -2056) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvldrepl_d_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 2048) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll -new file mode 100644 -index 000000000000..075d663b0dd7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) -+ -+define <32 x i8> @lasx_xvldrepl_b(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 %a) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) -+ -+define <16 x i16> @lasx_xvldrepl_h(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 %a) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) -+ -+define <8 x i32> @lasx_xvldrepl_w(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 %a) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) -+ -+define <4 x i64> @lasx_xvldrepl_d(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 %a) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll -new file mode 100644 -index 000000000000..a671e9979b2f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvmaxi_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvmaxi_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvmaxi_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvmaxi_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvmaxi_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvmaxi_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvmaxi_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvmaxi_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll -new file mode 100644 -index 000000000000..b85798b53c92 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll -new file mode 100644 -index 000000000000..5ed4104c295f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvmini_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvmini_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvmini_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvmini_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvmini_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvmini_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvmini_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvmini_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll -new file mode 100644 -index 000000000000..b81931977aad ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll -new file mode 100644 -index 000000000000..1130e094bf1f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvnori_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvnori_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll -new file mode 100644 -index 000000000000..8f2333064d64 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvnori_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll -new file mode 100644 -index 000000000000..90dec8e55f2d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvori_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvori_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll -new file mode 100644 -index 000000000000..ae6571d98f4a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvori_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll -new file mode 100644 -index 000000000000..41f4856bd8f7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll -@@ -0,0 +1,49 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpermi_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvpermi_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 256) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpermi_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvpermi_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 256) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvpermi_q_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvpermi_q_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll -new file mode 100644 -index 000000000000..afb335c5d6ca ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll -@@ -0,0 +1,28 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll -new file mode 100644 -index 000000000000..cfc6ec42874e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpickve_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvpickve_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 8) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpickve_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvpickve_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 4) -+ ret <4 x i64> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) -+ -+define <8 x float> @lasx_xvpickve_w_f_lo(<8 x float> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 -1) -+ ret <8 x float> %res -+} -+ -+define <8 x float> @lasx_xvpickve_w_f_hi(<8 x float> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 8) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) -+ -+define <4 x double> @lasx_xvpickve_d_f_lo(<4 x double> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 -1) -+ ret <4 x double> %res -+} -+ -+define <4 x double> @lasx_xvpickve_d_f_hi(<4 x double> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 4) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll -new file mode 100644 -index 000000000000..be1f19a89737 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) -+ -+define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 %c) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) -+ -+define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 %c) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll -new file mode 100644 -index 000000000000..93056b272dfc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lasx_xvpickve2gr_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 8) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 -1) -+ ret i64 %res -+} -+ -+define i64 @lasx_xvpickve2gr_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 4) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lasx_xvpickve2gr_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 8) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 -1) -+ ret i64 %res -+} -+ -+define i64 @lasx_xvpickve2gr_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 4) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll -new file mode 100644 -index 000000000000..0fa8c94adc60 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 %b) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 %b) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll -new file mode 100644 -index 000000000000..a0cb309c54e1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrepl128vei_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvrepl128vei_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrepl128vei_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvrepl128vei_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 8) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrepl128vei_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvrepl128vei_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 4) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrepl128vei_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvrepl128vei_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 2) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll -new file mode 100644 -index 000000000000..c537ffa66ba7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll -new file mode 100644 -index 000000000000..40abdf497605 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrotri_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvrotri_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrotri_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvrotri_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrotri_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvrotri_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrotri_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvrotri_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll -new file mode 100644 -index 000000000000..dd38301d0534 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll -new file mode 100644 -index 000000000000..839fbc9990d3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsat_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsat_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsat_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsat_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsat_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsat_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsat_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsat_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll -new file mode 100644 -index 000000000000..b73b32ebd3b0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll -new file mode 100644 -index 000000000000..bb6ef0cc6574 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvseqi_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvseqi_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvseqi_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvseqi_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvseqi_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvseqi_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvseqi_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvseqi_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll -new file mode 100644 -index 000000000000..fb2c6206da7b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll -new file mode 100644 -index 000000000000..9217d1f6a05d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvshuf4i_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvshuf4i_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvshuf4i_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvshuf4i_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 256) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvshuf4i_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvshuf4i_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 256) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvshuf4i_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvshuf4i_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 256) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll -new file mode 100644 -index 000000000000..8d6d1c694193 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll -new file mode 100644 -index 000000000000..5b10aca9801d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslei_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslei_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslei_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslei_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslei_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslei_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslei_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslei_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll -new file mode 100644 -index 000000000000..903bc10d88b7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll -new file mode 100644 -index 000000000000..bf8205376a6c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslli_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslli_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslli_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslli_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslli_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslli_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslli_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslli_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll -new file mode 100644 -index 000000000000..b5368a86b5c3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslli_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslli_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslli_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslli_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll -new file mode 100644 -index 000000000000..18803767d6c0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll -@@ -0,0 +1,97 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_h_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsllwil_h_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 8) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_w_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsllwil_w_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_d_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsllwil_d_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 32) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_hu_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsllwil_hu_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 8) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_wu_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsllwil_wu_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_du_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsllwil_du_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll -new file mode 100644 -index 000000000000..3f5d4d631671 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll -@@ -0,0 +1,55 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll -new file mode 100644 -index 000000000000..dc0567da4e47 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslti_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslti_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslti_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslti_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslti_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslti_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslti_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslti_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll -new file mode 100644 -index 000000000000..a2cedc8d3ef3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll -new file mode 100644 -index 000000000000..15b522d5e7e3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrai_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrai_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrai_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrai_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrai_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrai_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrai_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrai_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll -new file mode 100644 -index 000000000000..fefee7246ae6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll -new file mode 100644 -index 000000000000..bedbfc4889d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll -new file mode 100644 -index 000000000000..3c17f2b6090a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll -new file mode 100644 -index 000000000000..e417e3cc5bbf ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrari_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrari_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrari_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrari_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrari_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrari_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrari_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrari_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll -new file mode 100644 -index 000000000000..15fed7966f1c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll -new file mode 100644 -index 000000000000..83e977827e2d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll -new file mode 100644 -index 000000000000..eb577a29fb33 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll -new file mode 100644 -index 000000000000..3ab02dcb97ed ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrli_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrli_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrli_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrli_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrli_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrli_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrli_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrli_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll -new file mode 100644 -index 000000000000..bc085aeaa232 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll -new file mode 100644 -index 000000000000..9e7c94305630 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll -new file mode 100644 -index 000000000000..66d800470003 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll -new file mode 100644 -index 000000000000..52621ddc6f49 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlri_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrlri_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlri_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrlri_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlri_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrlri_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlri_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrlri_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll -new file mode 100644 -index 000000000000..5663e3475b12 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll -new file mode 100644 -index 000000000000..2d65a75b175a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll -new file mode 100644 -index 000000000000..82da0d21d013 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll -new file mode 100644 -index 000000000000..e10d5d7bd488 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrani_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrani_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrani_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrani_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll -new file mode 100644 -index 000000000000..a928cc2de8c8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll -new file mode 100644 -index 000000000000..42cd6ac99754 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrarni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrarni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrarni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrarni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll -new file mode 100644 -index 000000000000..f050e7d79b0f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll -new file mode 100644 -index 000000000000..26be21a83aa4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrlni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrlni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrlni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrlni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll -new file mode 100644 -index 000000000000..72da2a746dd5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll -new file mode 100644 -index 000000000000..cd778e2c0627 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrlrni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrlrni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrlrni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrlrni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll -new file mode 100644 -index 000000000000..a10c54329149 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll -new file mode 100644 -index 000000000000..0177f2b77b93 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) -+ -+define void @lasx_xvst_lo(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvst: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 -2049) -+ ret void -+} -+ -+define void @lasx_xvst_hi(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvst: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 2048) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll -new file mode 100644 -index 000000000000..c19207aad6b8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) -+ -+define void @lasx_xvst(<32 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 %b) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll -new file mode 100644 -index 000000000000..0ea2484e090d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll -@@ -0,0 +1,121 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_b_lo(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 -129, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_b_hi(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 128, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_b_idx_lo(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 -1) -+ ret void -+} -+ -+define void @lasx_xvstelm_b_idx_hi(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 32) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_h_lo(<16 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 -258, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_h_hi(<16 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 256, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_h_idx_lo(<16 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 -1) -+ ret void -+} -+ -+define void @lasx_xvstelm_h_idx_hi(<16 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 16) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_w_lo(<8 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 -516, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_w_hi(<8 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 512, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_w_idx_lo(<8 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 -1) -+ ret void -+} -+ -+define void @lasx_xvstelm_w_idx_hi(<8 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 8) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_d_lo(<4 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 -1032, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_d_hi(<4 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 1024, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_d_idx_lo(<4 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 -1) -+ ret void -+} -+ -+define void @lasx_xvstelm_d_idx_hi(<4 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 4) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll -new file mode 100644 -index 000000000000..42c7c0da1746 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_b_idx(<32 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_h_idx(<16 x i16> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_w_idx(<8 x i32> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_d_idx(<4 x i64> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 %b) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll -new file mode 100644 -index 000000000000..810008c17f7e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsubi_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsubi_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsubi_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsubi_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsubi_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsubi_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsubi_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsubi_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll -new file mode 100644 -index 000000000000..924b89ce9d6c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll -new file mode 100644 -index 000000000000..0170d204cf42 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvxori_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvxori_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll -new file mode 100644 -index 000000000000..1478f691a1cc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvxori_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} --- -2.20.1 - - -From 7f172768f1132b99d4bacf4daf119a9154428b52 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:15:19 +0800 -Subject: [PATCH 07/35] [LoongArch][MC] Add invalid immediate testcases for LSX - instructions - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D157573 - -(cherry picked from commit 2f4b6695836e16ec075061cd2508444bd403ad7d) ---- - llvm/test/MC/LoongArch/lsx/invalid-imm.s | 1149 +++++++++++++++++++++- - 1 file changed, 1143 insertions(+), 6 deletions(-) - -diff --git a/llvm/test/MC/LoongArch/lsx/invalid-imm.s b/llvm/test/MC/LoongArch/lsx/invalid-imm.s -index fb7e24c83488..c3f9aaa08281 100644 ---- a/llvm/test/MC/LoongArch/lsx/invalid-imm.s -+++ b/llvm/test/MC/LoongArch/lsx/invalid-imm.s -@@ -3,53 +3,1190 @@ - # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s - - ## uimm1 -+vstelm.d $vr0, $a0, 8, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ -+vstelm.d $vr0, $a0, 8, 2 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ -+vreplvei.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ - vreplvei.d $vr0, $vr1, 2 - # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] - -+vpickve2gr.du $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] -+ -+vpickve2gr.du $a0, $vr1, 2 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] -+ -+vpickve2gr.d $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] -+ -+vpickve2gr.d $a0, $vr1, 2 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] -+ -+vinsgr2vr.d $vr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ -+vinsgr2vr.d $vr0, $a0, 2 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ -+## uimm2 -+vstelm.w $vr0, $a0, 4, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vstelm.w $vr0, $a0, 4, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vreplvei.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vreplvei.w $vr0, $vr1, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vpickve2gr.wu $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] -+ -+vpickve2gr.wu $a0, $vr1, 4 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] -+ -+vpickve2gr.w $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+vpickve2gr.w $a0, $vr1, 4 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+vinsgr2vr.w $vr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vinsgr2vr.w $vr0, $a0, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+## uimm3 -+vstelm.h $vr0, $a0, 2, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vstelm.h $vr0, $a0, 2, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vreplvei.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vreplvei.h $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vpickve2gr.hu $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+vpickve2gr.hu $a0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+vpickve2gr.h $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+vpickve2gr.h $a0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+vinsgr2vr.h $vr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vinsgr2vr.h $vr0, $a0, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitrevi.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitrevi.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitseti.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitseti.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitclri.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitclri.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vsrari.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsrari.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsrlri.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsrlri.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsllwil.hu.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] -+ -+vsllwil.hu.bu $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] -+ -+vsllwil.h.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+vsllwil.h.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+vrotri.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vrotri.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsrai.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsrai.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsrli.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsrli.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vslli.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vslli.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsat.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] -+ -+vsat.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] -+ -+vsat.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsat.bu $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ - ## uimm4 -+vstelm.b $vr0, $a0, 1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vstelm.b $vr0, $a0, 1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vreplvei.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vreplvei.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vpickve2gr.bu $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vpickve2gr.bu $a0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vpickve2gr.b $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vpickve2gr.b $a0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vinsgr2vr.b $vr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vinsgr2vr.b $vr0, $a0, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitrevi.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitrevi.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitseti.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitseti.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitclri.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitclri.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vssrarni.bu.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vssrarni.bu.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vssrlrni.bu.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vssrlrni.bu.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vssrarni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrarni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrlrni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrlrni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrani.bu.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrani.bu.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrlni.bu.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrlni.bu.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrani.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vssrani.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vssrlni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vssrlni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrarni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrarni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrlrni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrlrni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrani.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vsrani.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vsrlni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vsrlni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vsrari.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsrari.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsrlri.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsrlri.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsllwil.wu.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vsllwil.wu.hu $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vsllwil.w.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsllwil.w.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vrotri.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vrotri.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsrai.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsrai.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsrli.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsrli.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vslli.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vslli.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsat.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] -+ - vsat.h $vr0, $vr1, 16 - # CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] - -+vsat.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsat.hu $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+## uimm5 -+vbsrl.v $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vbsrl.v $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vbsll.v $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vbsll.v $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vslti.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vfrstpi.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+vfrstpi.h $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+vfrstpi.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+vfrstpi.b $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+vbitrevi.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitrevi.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitseti.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitseti.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitclri.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitclri.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vssrarni.hu.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vssrarni.hu.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vssrlrni.hu.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vssrlrni.hu.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vssrarni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrarni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrlrni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrlrni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrani.hu.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrani.hu.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrlni.hu.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrlni.hu.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrani.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vssrani.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vssrlni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vssrlni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrarni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrarni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrlrni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrlrni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrani.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vsrani.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vsrlni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vsrlni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vsrari.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsrari.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsrlri.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsrlri.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsllwil.du.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vsllwil.du.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vsllwil.d.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsllwil.d.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vrotri.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vrotri.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsrai.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vsrai.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vsrli.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vsrli.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vslli.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vslli.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsat.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] -+ -+vsat.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] -+ -+vsat.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vsat.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ - ## simm5 -+vslti.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ - vseqi.b $vr0, $vr1, 16 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] - -+vmaxi.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+## uimm6 -+vbitrevi.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitrevi.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitseti.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitseti.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitclri.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitclri.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vssrarni.wu.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+vssrarni.wu.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+vssrlrni.wu.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+vssrlrni.wu.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+vssrarni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrarni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrlrni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrlrni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrani.wu.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrani.wu.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrlni.wu.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrlni.wu.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrani.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vssrani.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vssrlni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vssrlni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrarni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrarni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrlrni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrlrni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrani.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vsrani.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vsrlni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vsrlni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vsrari.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vsrari.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vsrlri.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vsrlri.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vrotri.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vrotri.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vsrai.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsrai.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsrli.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsrli.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vslli.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vslli.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsat.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] -+ -+vsat.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] -+ -+vsat.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsat.du $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ - ## uimm7 -+vssrarni.du.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+vssrarni.du.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+vssrlrni.du.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+vssrlrni.du.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+vssrarni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrarni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrlrni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrlrni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrani.du.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrani.du.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrlni.du.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrlni.du.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrani.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vssrani.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vssrlni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vssrlni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrarni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrarni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrlrni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrlrni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrani.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] -+ -+vsrani.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] -+ -+vsrlni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] -+ - vsrlni.d.q $vr0, $vr1, 128 - # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] - --## simm8 -+## uimm8 -+vextrins.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.d $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.w $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.h $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vpermi.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ - vpermi.w $vr0, $vr1, 256 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] - -+vshuf4i.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.d $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.w $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.h $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vbitseli.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vbitseli.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vandi.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vandi.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vori.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] -+ -+vori.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] -+ -+vxori.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vxori.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vnori.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vnori.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+## simm8 -+vstelm.b $vr0, $a0, -129, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] -+ -+vstelm.b $vr0, $a0, 128, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] -+ - ## simm8_lsl1 --vstelm.h $vr0, $a0, 255, 1 -+vstelm.h $vr0, $a0, -258, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] -+ -+vstelm.h $vr0, $a0, 256, 1 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] - - ## simm8_lsl2 --vstelm.w $vr0, $a0, 512, 1 -+vstelm.w $vr0, $a0, -516, 1 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] - --## simm10 --vrepli.b $vr0, 512 --# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+vstelm.w $vr0, $a0, 512, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] - - ## simm8_lsl3 -+vstelm.d $vr0, $a0, -1032, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] -+ - vstelm.d $vr0, $a0, 1024, 1 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] - - ## simm9_lsl3 -+vldrepl.d $vr0, $a0, -2056 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] -+ - vldrepl.d $vr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] - - ## simm10_lsl2 -+vldrepl.w $vr0, $a0, -2052 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] -+ - vldrepl.w $vr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] - -+## simm10 -+vrepli.b $vr0, -513 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.b $vr0, 512 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.h $vr0, -513 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.h $vr0, 512 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.w $vr0, -513 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.w $vr0, 512 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.d $vr0, -513 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.d $vr0, 512 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ - ## simm11_lsl1 -+vldrepl.h $vr0, $a0, -2050 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] -+ - vldrepl.h $vr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] - -+## simm12 -+vldrepl.b $vr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] -+ -+vldrepl.b $vr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] -+ -+vst $vr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -+ -+vst $vr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -+ -+vld $vr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -+ -+vld $vr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -+ - ## simm13 -+vldi $vr0, -4097 -+# CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] -+ - vldi $vr0, 4096 - # CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] --- -2.20.1 - - -From aca10c260dfde07f2248a70e3d37770ee75e8e7a Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:16:09 +0800 -Subject: [PATCH 08/35] [LoongArch][MC] Add invalid immediate testcases for - LASX instructions - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D157574 - -(cherry picked from commit d163ae8c255f663707d4b0d5de03fcb18274b3eb) ---- - llvm/test/MC/LoongArch/lasx/invalid-imm.s | 1149 ++++++++++++++++++++- - 1 file changed, 1143 insertions(+), 6 deletions(-) - -diff --git a/llvm/test/MC/LoongArch/lasx/invalid-imm.s b/llvm/test/MC/LoongArch/lasx/invalid-imm.s -index 5c61a7a42009..6f64a6f87802 100644 ---- a/llvm/test/MC/LoongArch/lasx/invalid-imm.s -+++ b/llvm/test/MC/LoongArch/lasx/invalid-imm.s -@@ -3,53 +3,1190 @@ - # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s - - ## uimm1 -+xvrepl128vei.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] -+ - xvrepl128vei.d $xr0, $xr1, 2 - # CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] - -+## uimm2 -+xvpickve.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve.d $xr0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+xvinsve0.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+xvinsve0.d $xr0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+xvinsgr2vr.d $xr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+xvinsgr2vr.d $xr0, $a0, 4 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve2gr.d $a0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve2gr.d $a0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve2gr.du $a0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve2gr.du $a0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] -+ -+xvstelm.d $xr0, $a0, 8, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+xvstelm.d $xr0, $a0, 8, 4 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+xvrepl128vei.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] -+ -+xvrepl128vei.w $xr0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] -+ -+## uimm3 -+xvpickve.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve.w $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+xvinsve0.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+xvinsve0.w $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+xvinsgr2vr.w $xr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvinsgr2vr.w $xr0, $a0, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve2gr.wu $a0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve2gr.wu $a0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve2gr.w $a0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve2gr.w $a0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+xvstelm.w $xr0, $a0, 4, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvstelm.w $xr0, $a0, 4, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvrepl128vei.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] -+ -+xvrepl128vei.h $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] -+ -+xvbitrevi.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitrevi.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitseti.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitseti.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitclri.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitclri.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvsrari.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsrari.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsrlri.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsrlri.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsllwil.hu.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] -+ -+xvsllwil.hu.bu $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] -+ -+xvsllwil.h.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+xvsllwil.h.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+xvrotri.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvrotri.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsrai.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsrai.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsrli.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsrli.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvslli.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvslli.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsat.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+xvsat.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+xvsat.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsat.bu $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ - ## uimm4 -+xvstelm.h $xr0, $a0, 2, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvstelm.h $xr0, $a0, 2, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvrepl128vei.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvrepl128vei.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvbitrevi.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitrevi.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitseti.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitseti.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitclri.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitclri.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvssrarni.bu.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvssrarni.bu.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlrni.bu.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlrni.bu.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvssrarni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrarni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlrni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlrni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrani.bu.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrani.bu.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlni.bu.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlni.bu.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrani.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvssrani.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrarni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrarni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlrni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlrni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrani.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvsrani.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvsrari.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsrari.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlri.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlri.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsllwil.wu.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvsllwil.wu.hu $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvsllwil.w.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsllwil.w.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvrotri.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvrotri.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsrai.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsrai.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsrli.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsrli.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvslli.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvslli.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsat.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ - xvsat.h $xr0, $xr1, 16 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] - -+xvsat.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsat.hu $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+## uimm5 -+xvstelm.b $xr0, $a0, 1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvstelm.b $xr0, $a0, 1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbsrl.v $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvbsrl.v $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvbsll.v $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvbsll.v $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvfrstpi.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+xvfrstpi.h $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+xvfrstpi.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+xvfrstpi.b $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+xvbitrevi.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitrevi.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitseti.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitseti.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitclri.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitclri.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvssrarni.hu.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvssrarni.hu.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlrni.hu.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlrni.hu.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvssrarni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrarni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlrni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlrni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrani.hu.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrani.hu.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlni.hu.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlni.hu.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrani.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvssrani.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrarni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrarni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlrni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlrni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrani.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvsrani.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvsrari.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsrari.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlri.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlri.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsllwil.du.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvsllwil.du.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvsllwil.d.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsllwil.d.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvrotri.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvrotri.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsrai.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvsrai.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvsrli.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvsrli.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvslli.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvslli.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsat.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+xvsat.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+xvsat.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvsat.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ - ## simm5 -+xvslti.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ - xvseqi.b $xr0, $xr1, 16 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] - -+xvmaxi.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+## uimm6 -+xvbitrevi.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitrevi.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitseti.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitseti.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitclri.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitclri.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvssrarni.wu.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] -+ -+xvssrarni.wu.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlrni.wu.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlrni.wu.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] -+ -+xvssrarni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrarni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlrni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlrni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrani.wu.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrani.wu.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlni.wu.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlni.wu.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrani.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvssrani.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrarni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrarni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlrni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlrni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrani.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvsrani.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvsrari.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvsrari.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlri.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlri.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvrotri.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvrotri.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvsrai.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsrai.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsrli.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsrli.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvslli.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvslli.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsat.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+xvsat.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+xvsat.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsat.du $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ - ## uimm7 -+xvssrarni.du.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] -+ -+xvssrarni.du.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlrni.du.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlrni.du.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] -+ -+xvssrarni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrarni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlrni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlrni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrani.du.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrani.du.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlni.du.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlni.du.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrani.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvssrani.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrarni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrarni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrlrni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrlrni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrani.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+xvsrani.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+xvsrlni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ - xvsrlni.d.q $xr0, $xr1, 128 - # CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] - --## simm8 -+## uimm8 -+xvextrins.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.d $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.w $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.h $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.q $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.d $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ - xvpermi.w $xr0, $xr1, 256 - # CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] - -+xvshuf4i.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.d $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.w $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.h $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvbitseli.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvbitseli.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvandi.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvandi.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvori.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+xvori.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+xvxori.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvxori.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvnori.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvnori.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+## simm8 -+xvstelm.b $xr0, $a0, -129, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] -+ -+xvstelm.b $xr0, $a0, 128, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] -+ - ## simm8_lsl1 --xvstelm.h $xr0, $a0, 255, 1 -+xvstelm.h $xr0, $a0, -258, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] -+ -+xvstelm.h $xr0, $a0, 256, 1 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] - - ## simm8_lsl2 --xvstelm.w $xr0, $a0, 512, 1 -+xvstelm.w $xr0, $a0, -516, 1 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] - --## simm10 --xvrepli.b $xr0, 512 --# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+xvstelm.w $xr0, $a0, 512, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] - - ## simm8_lsl3 -+xvstelm.d $xr0, $a0, -1032, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] -+ - xvstelm.d $xr0, $a0, 1024, 1 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] - - ## simm9_lsl3 -+xvldrepl.d $xr0, $a0, -2056 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] -+ - xvldrepl.d $xr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] - - ## simm10_lsl2 -+xvldrepl.w $xr0, $a0, -2052 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] -+ - xvldrepl.w $xr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] - -+## simm10 -+xvrepli.b $xr0, -513 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.b $xr0, 512 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.h $xr0, -513 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.h $xr0, 512 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.w $xr0, -513 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.w $xr0, 512 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.d $xr0, -513 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.d $xr0, 512 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ - ## simm11_lsl1 -+xvldrepl.h $xr0, $a0, -2050 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] -+ - xvldrepl.h $xr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] - -+## simm12 -+xvldrepl.b $xr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvldrepl.b $xr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvst $xr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvst $xr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvld $xr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvld $xr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -+ - ## simm13 -+xvldi $xr0, -4097 -+# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] -+ - xvldi $xr0, 4096 - # CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] --- -2.20.1 - - -From 73373a6158629eb02ed9fe0e540c21ffb84a549f Mon Sep 17 00:00:00 2001 -From: chenli -Date: Mon, 21 Aug 2023 11:03:49 +0800 -Subject: [PATCH 09/35] [LoongArch] Add testcases of LSX intrinsics with - immediates - -The testcases mainly cover three situations: -- the arguments which should be immediates are non immediates. -- the immediate is out of upper limit of the argument type. -- the immediate is out of lower limit of the argument type. - -Depends on D155829 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D157570 - -(cherry picked from commit 0c76f46ca676ebecbdf2c9f7e8b05421a234bbed) ---- - .../lsx/intrinsic-addi-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-addi-non-imm.ll | 37 +++++ - .../lsx/intrinsic-andi-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-andi-non-imm.ll | 10 ++ - .../lsx/intrinsic-bitclr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-bitclr-non-imm.ll | 37 +++++ - .../lsx/intrinsic-bitrev-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-bitrev-non-imm.ll | 37 +++++ - .../lsx/intrinsic-bitseli-invalid-imm.ll | 17 +++ - .../lsx/intrinsic-bitseli-non-imm.ll | 10 ++ - .../lsx/intrinsic-bitset-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-bitset-non-imm.ll | 37 +++++ - .../lsx/intrinsic-bsll-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-bsll-non-imm.ll | 10 ++ - .../lsx/intrinsic-bsrl-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-bsrl-non-imm.ll | 10 ++ - .../lsx/intrinsic-extrins-invalid-imm.ll | 65 +++++++++ - .../lsx/intrinsic-extrins-non-imm.ll | 37 +++++ - .../lsx/intrinsic-frstp-invalid-imm.ll | 33 +++++ - .../LoongArch/lsx/intrinsic-frstp-non-imm.ll | 19 +++ - .../lsx/intrinsic-insgr2vr-invalid-imm.ll | 65 +++++++++ - .../lsx/intrinsic-insgr2vr-non-imm.ll | 37 +++++ - .../LoongArch/lsx/intrinsic-ld-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-ld-non-imm.ll | 10 ++ - .../lsx/intrinsic-ldi-invalid-imm.ll | 81 +++++++++++ - .../LoongArch/lsx/intrinsic-ldi-non-imm.ll | 46 +++++++ - .../lsx/intrinsic-ldrepl-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-ldrepl-non-imm.ll | 37 +++++ - .../lsx/intrinsic-max-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-max-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-min-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-min-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-nori-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-nori-non-imm.ll | 10 ++ - .../lsx/intrinsic-ori-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-ori-non-imm.ll | 10 ++ - .../lsx/intrinsic-permi-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-permi-non-imm.ll | 10 ++ - .../lsx/intrinsic-pickve2gr-invalid-imm.ll | 129 ++++++++++++++++++ - .../lsx/intrinsic-pickve2gr-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-replvei-invalid-imm.ll | 65 +++++++++ - .../lsx/intrinsic-replvei-non-imm.ll | 37 +++++ - .../lsx/intrinsic-rotr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-rotr-non-imm.ll | 37 +++++ - .../lsx/intrinsic-sat-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-sat-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-seq-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-seq-non-imm.ll | 37 +++++ - .../lsx/intrinsic-shuf4i-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-shuf4i-non-imm.ll | 37 +++++ - .../lsx/intrinsic-sle-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-sle-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-sll-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-sll-non-imm.ll | 37 +++++ - .../lsx/intrinsic-sllwil-invalid-imm.ll | 97 +++++++++++++ - .../LoongArch/lsx/intrinsic-sllwil-non-imm.ll | 55 ++++++++ - .../lsx/intrinsic-slt-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-slt-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-sra-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-sra-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srani-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srani-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srar-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srar-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srarni-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srarni-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srl-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srl-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srlni-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srlni-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srlr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srlr-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srlrni-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srlrni-non-imm.ll | 37 +++++ - .../lsx/intrinsic-ssrani-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-ssrani-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-ssrarni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lsx/intrinsic-ssrarni-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-ssrlni-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-ssrlni-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-ssrlrni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lsx/intrinsic-ssrlrni-non-imm.ll | 73 ++++++++++ - .../LoongArch/lsx/intrinsic-st-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-st-non-imm.ll | 10 ++ - .../lsx/intrinsic-stelm-invalid-imm.ll | 121 ++++++++++++++++ - .../LoongArch/lsx/intrinsic-stelm-non-imm.ll | 65 +++++++++ - .../lsx/intrinsic-subi-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-subi-non-imm.ll | 37 +++++ - .../lsx/intrinsic-xori-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-xori-non-imm.ll | 10 ++ - 90 files changed, 4949 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll -new file mode 100644 -index 000000000000..6875872b6f83 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vaddi_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vaddi_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vaddi_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vaddi_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vaddi_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vaddi_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vaddi_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vaddi_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll -new file mode 100644 -index 000000000000..87d32b3ce02a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vaddi_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll -new file mode 100644 -index 000000000000..82a117b2aba5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vandi_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vandi_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll -new file mode 100644 -index 000000000000..c0c35c775266 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vandi_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll -new file mode 100644 -index 000000000000..b020806cd86c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitclri_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbitclri_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitclri_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vbitclri_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitclri_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vbitclri_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitclri_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vbitclri_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll -new file mode 100644 -index 000000000000..df6cdb99cdbc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll -new file mode 100644 -index 000000000000..24b6ec3284cb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitrevi_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbitrevi_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitrevi_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vbitrevi_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitrevi_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vbitrevi_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitrevi_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vbitrevi_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll -new file mode 100644 -index 000000000000..3ffb494c9907 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll -new file mode 100644 -index 000000000000..bc63b40e9fca ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseli_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbitseli_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll -new file mode 100644 -index 000000000000..52c1eb7d2024 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll -new file mode 100644 -index 000000000000..e57e14d8cb07 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseti_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbitseti_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitseti_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vbitseti_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitseti_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vbitseti_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitseti_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vbitseti_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll -new file mode 100644 -index 000000000000..9b2bde015ed9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll -new file mode 100644 -index 000000000000..eb49af49c9be ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsll_v_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbsll_v_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll -new file mode 100644 -index 000000000000..5b10c9e91a4f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsll_v(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll -new file mode 100644 -index 000000000000..bf56822e2ef5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsrl_v_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbsrl_v_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll -new file mode 100644 -index 000000000000..0bc038c869ce ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll -new file mode 100644 -index 000000000000..7f94234ed603 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vextrins_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vextrins_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 256) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vextrins_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vextrins_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 256) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vextrins_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vextrins_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 256) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vextrins_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vextrins_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 256) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll -new file mode 100644 -index 000000000000..e834002bb60b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll -new file mode 100644 -index 000000000000..0184c855c9c1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll -@@ -0,0 +1,33 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vfrstpi_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vfrstpi_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vfrstpi_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vfrstpi_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll -new file mode 100644 -index 000000000000..9583f672a305 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll -@@ -0,0 +1,19 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll -new file mode 100644 -index 000000000000..3d4f84fb6e03 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) -+ -+define <16 x i8> @lsx_vinsgr2vr_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vinsgr2vr_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) -+ -+define <8 x i16> @lsx_vinsgr2vr_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vinsgr2vr_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 8) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) -+ -+define <4 x i32> @lsx_vinsgr2vr_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vinsgr2vr_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 4) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) -+ -+define <2 x i64> @lsx_vinsgr2vr_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vinsgr2vr_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 2) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll -new file mode 100644 -index 000000000000..2a4c2218de8c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) -+ -+define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) -+ -+define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) -+ -+define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) -+ -+define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll -new file mode 100644 -index 000000000000..3aeb30ce66b4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) -+ -+define <16 x i8> @lsx_vld_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vld: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 -2049) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vld_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vld: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 2048) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll -new file mode 100644 -index 000000000000..db6a0318d87a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) -+ -+define <16 x i8> @lsx_vld(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 %a) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll -new file mode 100644 -index 000000000000..57f6f8e81d91 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll -@@ -0,0 +1,81 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) -+ -+define <2 x i64> @lsx_vldi_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vldi: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 -4097) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vldi_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vldi: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4096) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) -+ -+define <16 x i8> @lsx_vrepli_b_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 -513) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vrepli_b_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 512) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) -+ -+define <8 x i16> @lsx_vrepli_h_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 -513) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vrepli_h_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 512) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) -+ -+define <4 x i32> @lsx_vrepli_w_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 -513) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vrepli_w_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 512) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) -+ -+define <2 x i64> @lsx_vrepli_d_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 -513) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vrepli_d_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 512) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll -new file mode 100644 -index 000000000000..a8f8278f8097 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll -@@ -0,0 +1,46 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) -+ -+define <2 x i64> @lsx_vldi(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 %a) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) -+ -+define <16 x i8> @lsx_vrepli_b(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 %a) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) -+ -+define <8 x i16> @lsx_vrepli_h(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 %a) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) -+ -+define <4 x i32> @lsx_vrepli_w(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 %a) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) -+ -+define <2 x i64> @lsx_vrepli_d(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 %a) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll -new file mode 100644 -index 000000000000..cb640e1245da ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) -+ -+define <16 x i8> @lsx_vldrepl_b_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 -2049) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vldrepl_b_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 2048) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) -+ -+define <8 x i16> @lsx_vldrepl_h_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 -2050) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vldrepl_h_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2048) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) -+ -+define <4 x i32> @lsx_vldrepl_w_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 -2052) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vldrepl_w_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 2048) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) -+ -+define <2 x i64> @lsx_vldrepl_d_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 -2056) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vldrepl_d_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 2048) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll -new file mode 100644 -index 000000000000..e60b21913c69 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) -+ -+define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 %a) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) -+ -+define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 %a) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) -+ -+define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 %a) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) -+ -+define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 %a) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll -new file mode 100644 -index 000000000000..667ba32723fc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vmaxi_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vmaxi_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vmaxi_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vmaxi_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vmaxi_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vmaxi_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vmaxi_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vmaxi_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll -new file mode 100644 -index 000000000000..34bbe3495670 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll -new file mode 100644 -index 000000000000..b73bada4f06f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vmini_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vmini_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vmini_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vmini_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vmini_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vmini_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vmini_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vmini_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll -new file mode 100644 -index 000000000000..5d9b98cec4d0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll -new file mode 100644 -index 000000000000..8c59d8fb9fa5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vnori_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vnori_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll -new file mode 100644 -index 000000000000..322a39c106a6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vnori_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll -new file mode 100644 -index 000000000000..4a7fc7e109d9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vori_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vori_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll -new file mode 100644 -index 000000000000..5644b8581dce ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vori_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll -new file mode 100644 -index 000000000000..e439bbae6130 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vpermi_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vpermi_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 256) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll -new file mode 100644 -index 000000000000..bdfc08ed680a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll -new file mode 100644 -index 000000000000..3430c54d2194 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 16) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 8) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 4) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 -1) -+ ret i64 %res -+} -+ -+define i64 @lsx_vpickve2gr_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 2) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 16) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 8) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 4) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 -1) -+ ret i64 %res -+} -+ -+define i64 @lsx_vpickve2gr_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 2) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll -new file mode 100644 -index 000000000000..6dd3c1f27a81 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 %b) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 %b) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll -new file mode 100644 -index 000000000000..d625441122a6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vreplvei_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vreplvei_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vreplvei_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vreplvei_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 8) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vreplvei_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vreplvei_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 4) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vreplvei_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vreplvei_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 2) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll -new file mode 100644 -index 000000000000..3d271bb2b307 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll -new file mode 100644 -index 000000000000..3c53b36672ad ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vrotri_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vrotri_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vrotri_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vrotri_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vrotri_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vrotri_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vrotri_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vrotri_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll -new file mode 100644 -index 000000000000..fd8ba3a1c633 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vrotri_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vrotri_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vrotri_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vrotri_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll -new file mode 100644 -index 000000000000..45fa4e43be19 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsat_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsat_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsat_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsat_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsat_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsat_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsat_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsat_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll -new file mode 100644 -index 000000000000..afdbe0c1ce0b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll -new file mode 100644 -index 000000000000..220398ff28cd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vseqi_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vseqi_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vseqi_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vseqi_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vseqi_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vseqi_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vseqi_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vseqi_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll -new file mode 100644 -index 000000000000..5fa1dd30475c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vseqi_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vseqi_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vseqi_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vseqi_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll -new file mode 100644 -index 000000000000..4d6fadf08c26 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vshuf4i_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vshuf4i_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vshuf4i_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vshuf4i_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 256) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vshuf4i_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vshuf4i_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 256) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vshuf4i_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vshuf4i_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 256) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll -new file mode 100644 -index 000000000000..a7d138bcc00b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll -new file mode 100644 -index 000000000000..4c945e296711 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslei_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslei_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslei_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslei_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslei_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslei_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslei_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslei_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll -new file mode 100644 -index 000000000000..0fc137bf0549 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll -new file mode 100644 -index 000000000000..75406f94887c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslli_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslli_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslli_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslli_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslli_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslli_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslli_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslli_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll -new file mode 100644 -index 000000000000..7474b5e29734 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslli_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslli_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslli_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslli_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll -new file mode 100644 -index 000000000000..bda3523a0b5c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll -@@ -0,0 +1,97 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_h_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsllwil_h_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 8) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_w_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsllwil_w_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_d_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsllwil_d_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 32) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_hu_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsllwil_hu_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 8) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_wu_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsllwil_wu_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_du_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsllwil_du_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll -new file mode 100644 -index 000000000000..a03656d5ca07 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll -@@ -0,0 +1,55 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll -new file mode 100644 -index 000000000000..f6d014b19d6c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslti_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslti_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslti_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslti_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslti_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslti_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslti_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslti_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll -new file mode 100644 -index 000000000000..9a8b757dab4e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll -new file mode 100644 -index 000000000000..2a033a21b565 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrai_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrai_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrai_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrai_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrai_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrai_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrai_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrai_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll -new file mode 100644 -index 000000000000..c3b328145864 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrai_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrai_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrai_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrai_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll -new file mode 100644 -index 000000000000..d68064e9b902 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll -new file mode 100644 -index 000000000000..38cfde214dc1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll -new file mode 100644 -index 000000000000..b6c2d70cebbc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrari_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrari_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrari_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrari_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrari_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrari_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrari_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrari_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll -new file mode 100644 -index 000000000000..2ad8adcd823b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrari_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrari_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrari_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrari_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll -new file mode 100644 -index 000000000000..d24cf92a0392 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll -new file mode 100644 -index 000000000000..19de7445cba1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll -new file mode 100644 -index 000000000000..3beff790afab ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrli_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrli_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrli_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrli_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrli_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrli_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrli_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrli_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll -new file mode 100644 -index 000000000000..98652aca0d62 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrli_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrli_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrli_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrli_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll -new file mode 100644 -index 000000000000..054c4f393548 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll -new file mode 100644 -index 000000000000..76341df197fd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll -new file mode 100644 -index 000000000000..bcbd38e26e5f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlri_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrlri_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlri_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrlri_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlri_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrlri_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlri_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrlri_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll -new file mode 100644 -index 000000000000..4862b1546ccf ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll -new file mode 100644 -index 000000000000..8988ae88f9eb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll -new file mode 100644 -index 000000000000..e5530db56fed ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll -new file mode 100644 -index 000000000000..f7817921ebeb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrani_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrani_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrani_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrani_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll -new file mode 100644 -index 000000000000..a80ede9c5243 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll -new file mode 100644 -index 000000000000..4edda8c0a24a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrarni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrarni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrarni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrarni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll -new file mode 100644 -index 000000000000..a77e6e764c9d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll -new file mode 100644 -index 000000000000..6218af1fa773 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrlni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrlni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrlni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrlni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll -new file mode 100644 -index 000000000000..688be826f467 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll -new file mode 100644 -index 000000000000..98a0c5b3cd28 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrlrni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrlrni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrlrni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrlrni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll -new file mode 100644 -index 000000000000..c389b4fd6023 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll -new file mode 100644 -index 000000000000..64518380964b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) -+ -+define void @lsx_vst_lo(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vst: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2049) -+ ret void -+} -+ -+define void @lsx_vst_hi(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vst: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 2048) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll -new file mode 100644 -index 000000000000..119ed9b78658 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) -+ -+define void @lsx_vst(<16 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 %b) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll -new file mode 100644 -index 000000000000..277abcbd34cc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll -@@ -0,0 +1,121 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) -+ -+define void @lsx_vstelm_b_lo(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 -129, i32 15) -+ ret void -+} -+ -+define void @lsx_vstelm_b_hi(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 128, i32 15) -+ ret void -+} -+ -+define void @lsx_vstelm_b_idx_lo(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 -1) -+ ret void -+} -+ -+define void @lsx_vstelm_b_idx_hi(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 16) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) -+ -+define void @lsx_vstelm_h_lo(<8 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 -258, i32 7) -+ ret void -+} -+ -+define void @lsx_vstelm_h_hi(<8 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 256, i32 7) -+ ret void -+} -+ -+define void @lsx_vstelm_h_idx_lo(<8 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 -1) -+ ret void -+} -+ -+define void @lsx_vstelm_h_idx_hi(<8 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 8) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) -+ -+define void @lsx_vstelm_w_lo(<4 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 -516, i32 3) -+ ret void -+} -+ -+define void @lsx_vstelm_w_hi(<4 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 512, i32 3) -+ ret void -+} -+ -+define void @lsx_vstelm_w_idx_lo(<4 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 -1) -+ ret void -+} -+ -+define void @lsx_vstelm_w_idx_hi(<4 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 4) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) -+ -+define void @lsx_vstelm_d_lo(<2 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 -1032, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_d_hi(<2 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 1024, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_d_idx_lo(<2 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 -1) -+ ret void -+} -+ -+define void @lsx_vstelm_d_idx_hi(<2 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 2) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll -new file mode 100644 -index 000000000000..f53932f79035 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) -+ -+define void @lsx_vstelm_b(<16 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_b_idx(<16 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) -+ -+define void @lsx_vstelm_h(<8 x i16> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_h_idx(<8 x i16> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) -+ -+define void @lsx_vstelm_w(<4 x i32> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_w_idx(<4 x i32> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) -+ -+define void @lsx_vstelm_d(<2 x i64> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_d_idx(<2 x i64> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 %b) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll -new file mode 100644 -index 000000000000..96cc1241fbf3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsubi_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsubi_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsubi_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsubi_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsubi_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsubi_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsubi_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsubi_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll -new file mode 100644 -index 000000000000..162f9ad131c7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsubi_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll -new file mode 100644 -index 000000000000..5f5613189ac8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vxori_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vxori_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll -new file mode 100644 -index 000000000000..4238d89120f1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vxori_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} --- -2.20.1 - - -From 270c5590dbdb266e6df5bec97b38594824d9815a Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 23 Aug 2023 15:28:00 +0800 -Subject: [PATCH 10/35] [LoongArch] Reorder LoongArchTargetLowering(). NFC - -(cherry picked from commit 3693909ca47f1fafc97b441c91f5656acdd3907c) - -[LoongArch] Fix Subtarget.is64Bit - -(cherry picked from commit 749f36dae311000e1d69351707f4f24a72090c94) ---- - .../LoongArch/LoongArchISelLowering.cpp | 152 ++++++++++-------- - 1 file changed, 82 insertions(+), 70 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 3a40cd06a3eb..2f8ce57d3f5f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -47,20 +47,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - : TargetLowering(TM), Subtarget(STI) { - - MVT GRLenVT = Subtarget.getGRLenVT(); -+ - // Set up the register classes. -+ - addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); - if (Subtarget.hasBasicF()) - addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); - if (Subtarget.hasBasicD()) - addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); -- if (Subtarget.hasExtLSX()) -- for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32, -- MVT::v2i64}) -- addRegisterClass(VT, &LoongArch::LSX128RegClass); -- if (Subtarget.hasExtLASX()) -- for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32, -- MVT::v4i64}) -- addRegisterClass(VT, &LoongArch::LASX256RegClass); - - static const MVT::SimpleValueType LSXVTs[] = { - MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; -@@ -75,38 +69,57 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - for (MVT VT : LASXVTs) - addRegisterClass(VT, &LoongArch::LASX256RegClass); - -+ // Set operations for LA32 and LA64. -+ - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, - MVT::i1, Promote); - -- // TODO: add necessary setOperationAction calls later. - setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); - setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); - setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); - setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); - setOperationAction(ISD::ROTL, GRLenVT, Expand); - setOperationAction(ISD::CTPOP, GRLenVT, Expand); -- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); -- setOperationAction(ISD::TRAP, MVT::Other, Legal); -- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - - setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, -- ISD::JumpTable}, -+ ISD::JumpTable, ISD::GlobalTLSAddress}, - GRLenVT, Custom); - -- setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); -- -- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); -- -- setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); -- if (Subtarget.is64Bit()) -- setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); -+ setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); - - setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); - setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); - setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); - -+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); -+ setOperationAction(ISD::TRAP, MVT::Other, Legal); -+ -+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); -+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); -+ -+ // Expand bitreverse.i16 with native-width bitrev and shift for now, before -+ // we get to know which of sll and revb.2h is faster. -+ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); -+ setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); -+ -+ // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and -+ // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 -+ // and i32 could still be byte-swapped relatively cheaply. -+ setOperationAction(ISD::BSWAP, MVT::i16, Custom); -+ -+ setOperationAction(ISD::BR_JT, MVT::Other, Expand); -+ setOperationAction(ISD::BR_CC, GRLenVT, Expand); -+ setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); -+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); -+ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); -+ -+ setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); -+ setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); -+ -+ // Set operations for LA64 only. -+ - if (Subtarget.is64Bit()) { - setOperationAction(ISD::SHL, MVT::i32, Custom); - setOperationAction(ISD::SRA, MVT::i32, Custom); -@@ -117,50 +130,39 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::ROTL, MVT::i32, Custom); - setOperationAction(ISD::CTTZ, MVT::i32, Custom); - setOperationAction(ISD::CTLZ, MVT::i32, Custom); -- setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); -+ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); - setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); - setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); -+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); -- if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) -- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); -- if (Subtarget.hasBasicF()) -- setOperationAction(ISD::FRINT, MVT::f32, Legal); -- if (Subtarget.hasBasicD()) -- setOperationAction(ISD::FRINT, MVT::f64, Legal); -- } -+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); - -- // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and -- // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 -- // and i32 could still be byte-swapped relatively cheaply. -- setOperationAction(ISD::BSWAP, MVT::i16, Custom); -- if (Subtarget.is64Bit()) { -+ setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); - setOperationAction(ISD::BSWAP, MVT::i32, Custom); - } - -- // Expand bitreverse.i16 with native-width bitrev and shift for now, before -- // we get to know which of sll and revb.2h is faster. -- setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); -- if (Subtarget.is64Bit()) { -- setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); -- setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); -- } else { -- setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); -+ // Set operations for LA32 only. -+ -+ if (!Subtarget.is64Bit()) { - setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); - setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); -+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); -+ -+ // Set libcalls. -+ setLibcallName(RTLIB::MUL_I128, nullptr); - } - - static const ISD::CondCode FPCCToExpand[] = { - ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, - ISD::SETGE, ISD::SETNE, ISD::SETGT}; - -+ // Set operations for 'F' feature. -+ - if (Subtarget.hasBasicF()) { - setCondCodeAction(FPCCToExpand, MVT::f32, Expand); -+ - setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); - setOperationAction(ISD::BR_CC, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f32, Legal); -@@ -173,14 +175,30 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f32, Expand); -+ -+ if (Subtarget.is64Bit()) -+ setOperationAction(ISD::FRINT, MVT::f32, Legal); -+ -+ if (!Subtarget.hasBasicD()) { -+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); -+ if (Subtarget.is64Bit()) { -+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); -+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); -+ } -+ } - } -+ -+ // Set operations for 'D' feature. -+ - if (Subtarget.hasBasicD()) { -+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); -+ setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setCondCodeAction(FPCCToExpand, MVT::f64, Expand); -+ - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - setOperationAction(ISD::BR_CC, MVT::f64, Expand); - setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); -- setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f64, Legal); - setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); - setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); -@@ -189,35 +207,35 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); -- setTruncStoreAction(MVT::f64, MVT::f32, Expand); -- } -- -- setOperationAction(ISD::BR_JT, MVT::Other, Expand); - -- setOperationAction(ISD::BR_CC, GRLenVT, Expand); -- setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); -- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); -- setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); -- if (!Subtarget.is64Bit()) -- setLibcallName(RTLIB::MUL_I128, nullptr); -- -- setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); -- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); -- if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && -- !Subtarget.hasBasicD())) { -- setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); -- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); -+ if (Subtarget.is64Bit()) -+ setOperationAction(ISD::FRINT, MVT::f64, Legal); - } - -+ // Set operations for 'LSX' feature. -+ - if (Subtarget.hasExtLSX()) - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, - {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); - -+ // Set operations for 'LASX' feature. -+ - if (Subtarget.hasExtLASX()) - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, - {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, - Legal); - -+ // Set DAG combine for LA32 and LA64. -+ -+ setTargetDAGCombine(ISD::AND); -+ setTargetDAGCombine(ISD::OR); -+ setTargetDAGCombine(ISD::SRL); -+ -+ // Set DAG combine for 'LSX' feature. -+ -+ if (Subtarget.hasExtLSX()) -+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); -+ - // Compute derived properties from the register classes. - computeRegisterProperties(Subtarget.getRegisterInfo()); - -@@ -235,12 +253,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); - setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); - setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); -- -- setTargetDAGCombine(ISD::AND); -- setTargetDAGCombine(ISD::OR); -- setTargetDAGCombine(ISD::SRL); -- if (Subtarget.hasExtLSX()) -- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - } - - bool LoongArchTargetLowering::isOffsetFoldingLegal( --- -2.20.1 - - -From 9b554aa98f070e4fdbf2a76cca811db411ec3312 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 29 Aug 2023 19:16:20 +0800 -Subject: [PATCH 11/35] [LoongArch] Fix typos. NFC - -(cherry picked from commit 30b6b27385f8ddc550df54a097434a121ae56d12) ---- - .../LoongArch/LoongArchLASXInstrInfo.td | 52 +++++++++---------- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 50 +++++++++--------- - 2 files changed, 51 insertions(+), 51 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index a3afd4789dfc..947950be2b8f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1545,10 +1545,10 @@ foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", - // Pat<(Intrinsic timm:$imm) - // (LAInst timm:$imm)>; - def : Pat<(int_loongarch_lasx_xvldi timm:$imm), -- (XVLDI (to_valide_timm timm:$imm))>; -+ (XVLDI (to_valid_timm timm:$imm))>; - foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in - def : Pat<(deriveLASXIntrinsic.ret timm:$imm), -- (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; -+ (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; - - // vty: v32i8/v16i16/v8i32/v4i64 - // Pat<(Intrinsic vty:$xj, timm:$imm) -@@ -1558,25 +1558,25 @@ foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", - "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", - "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in - def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), -- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", - "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", - "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", - "XVREPL128VEI_H", "XVSHUF4I_H"] in - def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), -- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", - "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", - "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", - "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in - def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), -- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", - "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", - "XVPICKVE2GR_D", "XVPICKVE2GR_DU", - "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in - def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), -- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; - - // vty: v32i8/v16i16/v8i32/v4i64 - // Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) -@@ -1588,7 +1588,7 @@ foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", - def : Pat<(deriveLASXIntrinsic.ret - (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), - (!cast(Inst) LASX256:$xd, LASX256:$xj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", - "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", - "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", -@@ -1596,7 +1596,7 @@ foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", - def : Pat<(deriveLASXIntrinsic.ret - (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), - (!cast(Inst) LASX256:$xd, LASX256:$xj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", - "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", - "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", -@@ -1604,7 +1604,7 @@ foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", - def : Pat<(deriveLASXIntrinsic.ret - (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), - (!cast(Inst) LASX256:$xd, LASX256:$xj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", - "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", - "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", -@@ -1612,7 +1612,7 @@ foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", - def : Pat<(deriveLASXIntrinsic.ret - (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), - (!cast(Inst) LASX256:$xd, LASX256:$xj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - - // vty: v32i8/v16i16/v8i32/v4i64 - // Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), -@@ -1693,42 +1693,42 @@ foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_ - (!cast(Inst) LASX256:$xj)>; - - def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), -- (XVPICKVE_W v8f32:$xj, (to_valide_timm timm:$imm))>; -+ (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), -- (XVPICKVE_D v4f64:$xj, (to_valide_timm timm:$imm))>; -+ (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>; - - // load - def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), -- (XVLD GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLD GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), - (XVLDX GPR:$rj, GPR:$rk)>; - - def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), -- (XVLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), -- (XVLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), -- (XVLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), -- (XVLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; - - // store - def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), -- (XVST LASX256:$xd, GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVST LASX256:$xd, GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), - (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; - - def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), -- (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), -- (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), -- (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), -- (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - - } // Predicates = [HasExtLASX] -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 13332be0bc38..e021adcecf4d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -145,7 +145,7 @@ def lsxsplati32 : PatFrag<(ops node:$e0), - def lsxsplati64 : PatFrag<(ops node:$e0), - (v2i64 (build_vector node:$e0, node:$e0))>; - --def to_valide_timm : SDNodeXForm(N); - return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); - }]>; -@@ -1639,10 +1639,10 @@ foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", - // Pat<(Intrinsic timm:$imm) - // (LAInst timm:$imm)>; - def : Pat<(int_loongarch_lsx_vldi timm:$imm), -- (VLDI (to_valide_timm timm:$imm))>; -+ (VLDI (to_valid_timm timm:$imm))>; - foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in - def : Pat<(deriveLSXIntrinsic.ret timm:$imm), -- (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; -+ (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; - - // vty: v16i8/v8i16/v4i32/v2i64 - // Pat<(Intrinsic vty:$vj, timm:$imm) -@@ -1652,25 +1652,25 @@ foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", - "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", - "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in - def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), -- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; - foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", - "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", - "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", - "VREPLVEI_H", "VSHUF4I_H"] in - def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), -- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; - foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", - "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", - "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", - "VREPLVEI_W", "VSHUF4I_W"] in - def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), -- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; - foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", - "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", - "VPICKVE2GR_D", "VPICKVE2GR_DU", - "VREPLVEI_D"] in - def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), -- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; - - // vty: v16i8/v8i16/v4i32/v2i64 - // Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) -@@ -1682,7 +1682,7 @@ foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", - def : Pat<(deriveLSXIntrinsic.ret - (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), - (!cast(Inst) LSX128:$vd, LSX128:$vj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", - "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", - "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", -@@ -1690,7 +1690,7 @@ foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", - def : Pat<(deriveLSXIntrinsic.ret - (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), - (!cast(Inst) LSX128:$vd, LSX128:$vj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", - "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", - "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", -@@ -1698,7 +1698,7 @@ foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", - def : Pat<(deriveLSXIntrinsic.ret - (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), - (!cast(Inst) LSX128:$vd, LSX128:$vj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", - "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", - "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", -@@ -1706,7 +1706,7 @@ foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", - def : Pat<(deriveLSXIntrinsic.ret - (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), - (!cast(Inst) LSX128:$vd, LSX128:$vj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - - // vty: v16i8/v8i16/v4i32/v2i64 - // Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), -@@ -1788,36 +1788,36 @@ foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", - - // load - def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), -- (VLD GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLD GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), - (VLDX GPR:$rj, GPR:$rk)>; - - def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), -- (VLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), -- (VLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), -- (VLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), -- (VLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; - - // store - def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), -- (VST LSX128:$vd, GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VST LSX128:$vd, GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), - (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; - - def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), -- (VSTELM_B v16i8:$vd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (VSTELM_B v16i8:$vd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), -- (VSTELM_H v8i16:$vd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (VSTELM_H v8i16:$vd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), -- (VSTELM_W v4i32:$vd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (VSTELM_W v4i32:$vd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), -- (VSTELM_D v2i64:$vd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (VSTELM_D v2i64:$vd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - - } // Predicates = [HasExtLSX] --- -2.20.1 - - -From 14892c2a03810b1e01aa62e8a5f12e4f4272bf23 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 24 Oct 2023 15:46:56 +0800 -Subject: [PATCH 12/35] [LoongArch] Set some operations action for LSX and LASX - -First, expand all truncationg stores and extending loads. Second, -expand everything for `fixedlen_vector_valuetypes`. Finally, we -selectively turn on ones that can be effectively codegen'd. - -Simultaneously, this patch adds floating-point vector types to -load/store patterns. Additional test cases will be included in the IR -instruction test patchs. - -(cherry picked from commit f2441a06c609cedbb7e11303907f07bf0ca5cb2f) ---- - .../LoongArch/LoongArchISelLowering.cpp | 74 +++++++++++++++++-- - .../LoongArch/LoongArchLASXInstrInfo.td | 2 +- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 2 +- - 3 files changed, 69 insertions(+), 9 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 2f8ce57d3f5f..d3627cec2e8c 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -214,16 +214,76 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - - // Set operations for 'LSX' feature. - -- if (Subtarget.hasExtLSX()) -- setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, -- {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); -+ if (Subtarget.hasExtLSX()) { -+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) { -+ // Expand all truncating stores and extending loads. -+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { -+ setTruncStoreAction(VT, InnerVT, Expand); -+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); -+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); -+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); -+ } -+ // By default everything must be expanded. Then we will selectively turn -+ // on ones that can be effectively codegen'd. -+ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) -+ setOperationAction(Op, VT, Expand); -+ } -+ -+ for (MVT VT : LSXVTs) { -+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); -+ setOperationAction(ISD::BITCAST, VT, Legal); -+ setOperationAction(ISD::UNDEF, VT, Legal); -+ -+ // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it -+ // will be `Custom` handled in the future. -+ setOperationAction(ISD::BUILD_VECTOR, VT, Legal); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ } -+ for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { -+ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, -+ Legal); -+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, -+ VT, Legal); -+ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); -+ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); -+ setOperationAction(ISD::CTPOP, VT, Legal); -+ } -+ for (MVT VT : {MVT::v4f32, MVT::v2f64}) { -+ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -+ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -+ setOperationAction(ISD::FMA, VT, Legal); -+ } -+ } - - // Set operations for 'LASX' feature. - -- if (Subtarget.hasExtLASX()) -- setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, -- {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, -- Legal); -+ if (Subtarget.hasExtLASX()) { -+ for (MVT VT : LASXVTs) { -+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); -+ setOperationAction(ISD::BITCAST, VT, Legal); -+ setOperationAction(ISD::UNDEF, VT, Legal); -+ -+ // FIXME: Same as above. -+ setOperationAction(ISD::BUILD_VECTOR, VT, Legal); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ } -+ for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { -+ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, -+ Legal); -+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, -+ VT, Legal); -+ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); -+ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); -+ setOperationAction(ISD::CTPOP, VT, Legal); -+ } -+ for (MVT VT : {MVT::v8f32, MVT::v4f64}) { -+ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -+ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -+ setOperationAction(ISD::FMA, VT, Legal); -+ } -+ } - - // Set DAG combine for LA32 and LA64. - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 947950be2b8f..e19aa92266b1 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1394,7 +1394,7 @@ def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), - (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; - - // Loads/Stores --foreach vt = [v32i8, v16i16, v8i32, v4i64] in { -+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { - defm : LdPat; - def : RegRegLdPat; - defm : StPat; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index e021adcecf4d..9391b1a8a20c 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1494,7 +1494,7 @@ def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), - (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; - - // Loads/Stores --foreach vt = [v16i8, v8i16, v4i32, v2i64] in { -+foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { - defm : LdPat; - def : RegRegLdPat; - defm : StPat; --- -2.20.1 - - -From 85d34e0b7e9947dda7ea981aa1dc10714fd44de5 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Thu, 30 Nov 2023 17:29:18 +0800 -Subject: [PATCH 13/35] [LoongArch] Add codegen support for extractelement - (#73759) - -Add codegen support for extractelement when enable `lsx` or `lasx` -feature. - -(cherry picked from commit b72456120f1db38ed7068fb592fcf768c6d5cce2) ---- - .../LoongArch/LoongArchISelLowering.cpp | 2 + - .../Target/LoongArch/LoongArchInstrInfo.cpp | 8 + - .../LoongArch/LoongArchLASXInstrInfo.td | 38 ++++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 38 ++++ - .../lasx/ir-instruction/extractelement.ll | 172 ++++++++++++++++++ - .../lsx/ir-instruction/extractelement.ll | 170 +++++++++++++++++ - 6 files changed, 428 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index d3627cec2e8c..26e94a53b344 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -238,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - // will be `Custom` handled in the future. - setOperationAction(ISD::BUILD_VECTOR, VT, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -@@ -267,6 +268,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - // FIXME: Same as above. - setOperationAction(ISD::BUILD_VECTOR, VT, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -index ddd1c9943fac..6576100d3b32 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -@@ -90,6 +90,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - Opc = LoongArch::FMOV_S; - } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) { - Opc = LoongArch::FMOV_D; -+ } else if (LoongArch::GPRRegClass.contains(DstReg) && -+ LoongArch::FPR32RegClass.contains(SrcReg)) { -+ // FPR32 -> GPR copies -+ Opc = LoongArch::MOVFR2GR_S; -+ } else if (LoongArch::GPRRegClass.contains(DstReg) && -+ LoongArch::FPR64RegClass.contains(SrcReg)) { -+ // FPR64 -> GPR copies -+ Opc = LoongArch::MOVFR2GR_D; - } else { - // TODO: support other copies. - llvm_unreachable("Impossible reg-to-reg copy"); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index e19aa92266b1..380206ddcf10 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1401,6 +1401,44 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { - def : RegRegStPat; - } - -+// Vector extraction with constant index. -+def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), -+ (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; -+def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), -+ (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; -+def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), -+ (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; -+def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), -+ (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; -+def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), -+ (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; -+def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), -+ (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; -+ -+// Vector extraction with variable index. -+def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), -+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, -+ i64:$rk), -+ sub_32)), -+ GPR), (i64 24))>; -+def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), -+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, -+ i64:$rk), -+ sub_32)), -+ GPR), (i64 16))>; -+def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), -+ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), -+ sub_32)), -+ GPR)>; -+def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), -+ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), -+ sub_64)), -+ GPR)>; -+def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), -+ (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; -+def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), -+ (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 9391b1a8a20c..980870e34503 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1501,6 +1501,44 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { - def : RegRegStPat; - } - -+// Vector extraction with constant index. -+def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)), -+ (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>; -+def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)), -+ (VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>; -+def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)), -+ (VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>; -+def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)), -+ (VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>; -+def : Pat<(f32 (vector_extract v4f32:$vj, uimm2:$imm)), -+ (f32 (EXTRACT_SUBREG (VREPLVEI_W v4f32:$vj, uimm2:$imm), sub_32))>; -+def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)), -+ (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>; -+ -+// Vector extraction with variable index. -+def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)), -+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, -+ i64:$rk), -+ sub_32)), -+ GPR), (i64 24))>; -+def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)), -+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, -+ i64:$rk), -+ sub_32)), -+ GPR), (i64 16))>; -+def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)), -+ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk), -+ sub_32)), -+ GPR)>; -+def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)), -+ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk), -+ sub_64)), -+ GPR)>; -+def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), -+ (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>; -+def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), -+ (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -new file mode 100644 -index 000000000000..78f584cd09a8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -@@ -0,0 +1,172 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @extract_32xi8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_32xi8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -+; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %e = extractelement <32 x i8> %v, i32 1 -+ store i8 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_16xi16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_16xi16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -+; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %e = extractelement <16 x i16> %v, i32 1 -+ store i16 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xi32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_8xi32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -+; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i32>, ptr %src -+ %e = extractelement <8 x i32> %v, i32 1 -+ store i32 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xi64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_4xi64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 -+; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i64>, ptr %src -+ %e = extractelement <4 x i64> %v, i32 1 -+ store i64 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_8xfloat: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: ori $a0, $zero, 7 -+; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 -+; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x float>, ptr %src -+ %e = extractelement <8 x float> %v, i32 7 -+ store float %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_4xdouble: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: ori $a0, $zero, 3 -+; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 -+; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x double>, ptr %src -+ %e = extractelement <4 x double> %v, i32 3 -+ store double %e, ptr %dst -+ ret void -+} -+ -+define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_32xi8_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: srai.w $a0, $a0, 24 -+; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %e = extractelement <32 x i8> %v, i32 %idx -+ store i8 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_16xi16_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: srai.w $a0, $a0, 16 -+; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %e = extractelement <16 x i16> %v, i32 %idx -+ store i16 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_8xi32_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i32>, ptr %src -+ %e = extractelement <8 x i32> %v, i32 %idx -+ store i32 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_4xi64_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 -+; CHECK-NEXT: movfr2gr.d $a0, $fa0 -+; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i64>, ptr %src -+ %e = extractelement <4 x i64> %v, i32 %idx -+ store i64 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_8xfloat_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 -+; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x float>, ptr %src -+ %e = extractelement <8 x float> %v, i32 %idx -+ store float %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_4xdouble_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 -+; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x double>, ptr %src -+ %e = extractelement <4 x double> %v, i32 %idx -+ store double %e, ptr %dst -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll -new file mode 100644 -index 000000000000..b8798c97861e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll -@@ -0,0 +1,170 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @extract_16xi8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_16xi8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -+; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i8>, ptr %src -+ %e = extractelement <16 x i8> %v, i32 1 -+ store i8 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xi16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_8xi16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -+; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i16>, ptr %src -+ %e = extractelement <8 x i16> %v, i32 1 -+ store i16 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xi32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_4xi32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -+; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %e = extractelement <4 x i32> %v, i32 1 -+ store i32 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_2xi64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_2xi64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 -+; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x i64>, ptr %src -+ %e = extractelement <2 x i64> %v, i32 1 -+ store i64 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xfloat(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_4xfloat: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 -+; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x float>, ptr %src -+ %e = extractelement <4 x float> %v, i32 1 -+ store float %e, ptr %dst -+ ret void -+} -+ -+define void @extract_2xdouble(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_2xdouble: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 -+; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x double>, ptr %src -+ %e = extractelement <2 x double> %v, i32 1 -+ store double %e, ptr %dst -+ ret void -+} -+ -+define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_16xi8_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.b $vr0, $vr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: srai.w $a0, $a0, 24 -+; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i8>, ptr %src -+ %e = extractelement <16 x i8> %v, i32 %idx -+ store i8 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_8xi16_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.h $vr0, $vr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: srai.w $a0, $a0, 16 -+; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i16>, ptr %src -+ %e = extractelement <8 x i16> %v, i32 %idx -+ store i16 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_4xi32_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %e = extractelement <4 x i32> %v, i32 %idx -+ store i32 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_2xi64_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 -+; CHECK-NEXT: movfr2gr.d $a0, $fa0 -+; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x i64>, ptr %src -+ %e = extractelement <2 x i64> %v, i32 %idx -+ store i64 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_4xfloat_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 -+; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x float>, ptr %src -+ %e = extractelement <4 x float> %v, i32 %idx -+ store float %e, ptr %dst -+ ret void -+} -+ -+define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_2xdouble_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 -+; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x double>, ptr %src -+ %e = extractelement <2 x double> %v, i32 %idx -+ store double %e, ptr %dst -+ ret void -+} --- -2.20.1 - - -From eb1dc17f9111c2bf2d20d366a9b46c4bda0606f6 Mon Sep 17 00:00:00 2001 -From: leecheechen -Date: Thu, 30 Nov 2023 21:41:18 +0800 -Subject: [PATCH 14/35] [LoongArch] Add some binary IR instructions testcases - for LSX (#73929) - -The IR instructions include: -- Binary Operations: add fadd sub fsub mul fmul udiv sdiv fdiv -- Bitwise Binary Operations: shl lshr ashr - -(cherry picked from commit 29a0f3ec2b47630ce229953fe7250e741b6c10b6) ---- - .../LoongArch/lsx/ir-instruction/add.ll | 122 +++++++++ - .../LoongArch/lsx/ir-instruction/ashr.ll | 178 +++++++++++++ - .../LoongArch/lsx/ir-instruction/fadd.ll | 34 +++ - .../LoongArch/lsx/ir-instruction/fdiv.ll | 34 +++ - .../LoongArch/lsx/ir-instruction/fmul.ll | 34 +++ - .../LoongArch/lsx/ir-instruction/fsub.ll | 34 +++ - .../LoongArch/lsx/ir-instruction/lshr.ll | 178 +++++++++++++ - .../LoongArch/lsx/ir-instruction/mul.ll | 242 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/sdiv.ll | 134 ++++++++++ - .../LoongArch/lsx/ir-instruction/shl.ll | 178 +++++++++++++ - .../LoongArch/lsx/ir-instruction/sub.ll | 122 +++++++++ - .../LoongArch/lsx/ir-instruction/udiv.ll | 122 +++++++++ - 12 files changed, 1412 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll -new file mode 100644 -index 000000000000..2a7c37c2ae34 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vadd.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = add <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vadd.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = add <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vadd.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = add <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vadd.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = add <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v16i8_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v16i8_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = add <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v8i16_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v8i16_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = add <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = add <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v2i64_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v2i64_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = add <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll -new file mode 100644 -index 000000000000..fbc570d77ba8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @ashr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsra.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = ashr <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsra.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = ashr <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsra.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = ashr <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsra.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = ashr <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v16i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = ashr <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v16i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = ashr <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v8i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = ashr <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v8i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = ashr <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v4i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = ashr <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = ashr <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v2i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v2i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = ashr <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v2i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v2i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = ashr <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll -new file mode 100644 -index 000000000000..1fa1f611c4a3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fadd_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fadd_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfadd.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = fadd <4 x float> %v0, %v1 -+ store <4 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fadd_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fadd_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfadd.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = fadd <2 x double> %v0, %v1 -+ store <2 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -new file mode 100644 -index 000000000000..eb7c8bd9616e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fdiv_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfdiv.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = fdiv <4 x float> %v0, %v1 -+ store <4 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fdiv_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfdiv.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = fdiv <2 x double> %v0, %v1 -+ store <2 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll -new file mode 100644 -index 000000000000..e7fb527f7805 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fmul_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fmul_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = fmul <4 x float> %v0, %v1 -+ store <4 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fmul_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fmul_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = fmul <2 x double> %v0, %v1 -+ store <2 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll -new file mode 100644 -index 000000000000..df98182321da ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fsub_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fsub_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = fsub <4 x float> %v0, %v1 -+ store <4 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fsub_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fsub_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = fsub <2 x double> %v0, %v1 -+ store <2 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll -new file mode 100644 -index 000000000000..dada52f93060 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @lshr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsrl.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = lshr <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsrl.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = lshr <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsrl.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = lshr <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsrl.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = lshr <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v16i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = lshr <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v16i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = lshr <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v8i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = lshr <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v8i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = lshr <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v4i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = lshr <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = lshr <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v2i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v2i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = lshr <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v2i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v2i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = lshr <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -new file mode 100644 -index 000000000000..5060240cd8b1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -@@ -0,0 +1,242 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @mul_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = mul <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = mul <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = mul <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = mul <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_square_v16i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vmul.b $vr0, $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = mul <16 x i8> %v0, %v0 -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v8i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vmul.h $vr0, $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = mul <8 x i16> %v0, %v0 -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v4i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vmul.w $vr0, $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = mul <4 x i32> %v0, %v0 -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v2i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vmul.d $vr0, $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = mul <2 x i64> %v0, %v0 -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v16i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v16i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.b $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = mul <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v8i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v8i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.h $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = mul <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v4i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v4i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.w $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = mul <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v2i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v2i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.d $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = mul <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v16i8_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a2, $zero, 17 -+; CHECK-NEXT: vreplgr2vr.b $vr0, $a2 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = mul <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v8i16_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a2, $zero, 17 -+; CHECK-NEXT: vreplgr2vr.h $vr0, $a2 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = mul <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v4i32_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a2, $zero, 17 -+; CHECK-NEXT: vreplgr2vr.w $vr0, $a2 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = mul <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v2i64_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a2, $zero, 17 -+; CHECK-NEXT: vreplgr2vr.d $vr0, $a2 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = mul <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll -new file mode 100644 -index 000000000000..b68f73a74913 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll -@@ -0,0 +1,134 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @sdiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = sdiv <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = sdiv <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = sdiv <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = sdiv <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v16i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v16i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.b $vr1, $vr0, 7 -+; CHECK-NEXT: vsrli.b $vr1, $vr1, 5 -+; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: vsrai.b $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = sdiv <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v8i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v8i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.h $vr1, $vr0, 15 -+; CHECK-NEXT: vsrli.h $vr1, $vr1, 13 -+; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: vsrai.h $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = sdiv <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v4i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v4i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.w $vr1, $vr0, 31 -+; CHECK-NEXT: vsrli.w $vr1, $vr1, 29 -+; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: vsrai.w $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = sdiv <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v2i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v2i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.d $vr1, $vr0, 63 -+; CHECK-NEXT: vsrli.d $vr1, $vr1, 61 -+; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: vsrai.d $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = sdiv <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll -new file mode 100644 -index 000000000000..fa0aebaf28b3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @shl_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsll.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = shl <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsll.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = shl <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsll.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = shl <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsll.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = shl <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v16i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v16i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.b $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = shl <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v16i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v16i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.b $vr0, $vr0, 7 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = shl <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v8i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v8i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.h $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = shl <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v8i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v8i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = shl <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v4i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v4i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.w $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = shl <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.w $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = shl <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v2i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v2i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.d $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = shl <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v2i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v2i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.d $vr0, $vr0, 63 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = shl <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll -new file mode 100644 -index 000000000000..25b4623a47d1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsub.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = sub <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsub.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = sub <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsub.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = sub <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsub.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = sub <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v16i8_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v16i8_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = sub <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v8i16_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v8i16_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = sub <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = sub <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v2i64_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v2i64_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = sub <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll -new file mode 100644 -index 000000000000..abb60b91dd48 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @udiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = udiv <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = udiv <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = udiv <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = udiv <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v16i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v16i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.b $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = udiv <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v8i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v8i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.h $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = udiv <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v4i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v4i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.w $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = udiv <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v2i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v2i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.d $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = udiv <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From 30b414d9f2eb968e9f4cc6ffc76389a6f93b2907 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Thu, 16 Nov 2023 20:05:01 +0800 -Subject: [PATCH 15/35] [LoongArch] Add codegen support for insertelement - -(cherry picked from commit f2cbd1fdf702afe31d0198c9185e08dc2b104252) ---- - .../LoongArch/LoongArchISelLowering.cpp | 82 +++++- - .../Target/LoongArch/LoongArchISelLowering.h | 1 + - .../LoongArch/LoongArchLASXInstrInfo.td | 18 ++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 5 + - .../lasx/ir-instruction/insertelement.ll | 276 ++++++++++++++++++ - .../lsx/ir-instruction/insertelement.ll | 196 +++++++++++++ - 6 files changed, 576 insertions(+), 2 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 26e94a53b344..492339ce2151 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -237,7 +237,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it - // will be `Custom` handled in the future. - setOperationAction(ISD::BUILD_VECTOR, VT, Legal); -- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { -@@ -267,7 +267,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - - // FIXME: Same as above. - setOperationAction(ISD::BUILD_VECTOR, VT, Legal); -- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { -@@ -369,10 +369,20 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - return lowerRETURNADDR(Op, DAG); - case ISD::WRITE_REGISTER: - return lowerWRITE_REGISTER(Op, DAG); -+ case ISD::INSERT_VECTOR_ELT: -+ return lowerINSERT_VECTOR_ELT(Op, DAG); - } - return SDValue(); - } - -+SDValue -+LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, -+ SelectionDAG &DAG) const { -+ if (isa(Op->getOperand(2))) -+ return Op; -+ return SDValue(); -+} -+ - SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, - SelectionDAG &DAG) const { - -@@ -3040,6 +3050,71 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, - return SinkBB; - } - -+static MachineBasicBlock * -+emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, -+ const LoongArchSubtarget &Subtarget) { -+ unsigned InsOp; -+ unsigned HalfSize; -+ switch (MI.getOpcode()) { -+ default: -+ llvm_unreachable("Unexpected opcode"); -+ case LoongArch::PseudoXVINSGR2VR_B: -+ HalfSize = 16; -+ InsOp = LoongArch::VINSGR2VR_B; -+ break; -+ case LoongArch::PseudoXVINSGR2VR_H: -+ HalfSize = 8; -+ InsOp = LoongArch::VINSGR2VR_H; -+ break; -+ } -+ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -+ const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; -+ const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; -+ DebugLoc DL = MI.getDebugLoc(); -+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); -+ // XDst = vector_insert XSrc, Elt, Idx -+ Register XDst = MI.getOperand(0).getReg(); -+ Register XSrc = MI.getOperand(1).getReg(); -+ Register Elt = MI.getOperand(2).getReg(); -+ unsigned Idx = MI.getOperand(3).getImm(); -+ -+ Register ScratchReg1 = XSrc; -+ if (Idx >= HalfSize) { -+ ScratchReg1 = MRI.createVirtualRegister(RC); -+ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) -+ .addReg(XSrc) -+ .addReg(XSrc) -+ .addImm(1); -+ } -+ -+ Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); -+ Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); -+ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) -+ .addReg(ScratchReg1, 0, LoongArch::sub_128); -+ BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) -+ .addReg(ScratchSubReg1) -+ .addReg(Elt) -+ .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); -+ -+ Register ScratchReg2 = XDst; -+ if (Idx >= HalfSize) -+ ScratchReg2 = MRI.createVirtualRegister(RC); -+ -+ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) -+ .addImm(0) -+ .addReg(ScratchSubReg2) -+ .addImm(LoongArch::sub_128); -+ -+ if (Idx >= HalfSize) -+ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) -+ .addReg(XSrc) -+ .addReg(ScratchReg2) -+ .addImm(2); -+ -+ MI.eraseFromParent(); -+ return BB; -+} -+ - MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - MachineInstr &MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -@@ -3095,6 +3170,9 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - case LoongArch::PseudoXVBNZ_W: - case LoongArch::PseudoXVBNZ_D: - return emitVecCondBranchPseudo(MI, BB, Subtarget); -+ case LoongArch::PseudoXVINSGR2VR_B: -+ case LoongArch::PseudoXVINSGR2VR_H: -+ return emitPseudoXVINSGR2VR(MI, BB, Subtarget); - } - } - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 7765057ebffb..29028ff963d0 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -275,6 +275,7 @@ private: - SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - - bool isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const override; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 380206ddcf10..475565db15c9 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1065,6 +1065,13 @@ def PseudoXVBZ_W : VecCond; - def PseudoXVBZ_D : VecCond; - def PseudoXVBZ : VecCond; - -+let usesCustomInserter = 1, Constraints = "$xd = $dst" in { -+def PseudoXVINSGR2VR_B -+ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm5:$imm)>; -+def PseudoXVINSGR2VR_H -+ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>; -+} // usesCustomInserter = 1, Constraints = "$xd = $dst" -+ - } // Predicates = [HasExtLASX] - - multiclass PatXr { -@@ -1365,12 +1372,23 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), - def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), - (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; - -+// PseudoXVINSGR2VR_{B/H} -+def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), -+ (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; -+def : Pat<(vector_insert v16i16:$xd, GRLenVT:$rj, uimm4:$imm), -+ (PseudoXVINSGR2VR_H v16i16:$xd, GRLenVT:$rj, uimm4:$imm)>; -+ - // XVINSGR2VR_{W/D} - def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), - (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; - def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), - (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; - -+def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm), -+ (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>; -+def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm), -+ (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>; -+ - // XVPICKVE2GR_W[U] - def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), - (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 980870e34503..d8fd132a1c59 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1462,6 +1462,11 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), - def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), - (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; - -+def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm), -+ (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>; -+def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm), -+ (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>; -+ - // VPICKVE2GR_{B/H/W}[U] - def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), - (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll -new file mode 100644 -index 000000000000..e571a5d2e4cf ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll -@@ -0,0 +1,276 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind { -+; CHECK-LABEL: insert_32xi8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %v_new = insertelement <32 x i8> %v, i8 %in, i32 1 -+ store <32 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind { -+; CHECK-LABEL: insert_32xi8_upper: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 0 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %v_new = insertelement <32 x i8> %v, i8 %in, i32 16 -+ store <32 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_16xi16(ptr %src, ptr %dst, i16 %in) nounwind { -+; CHECK-LABEL: insert_16xi16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %v_new = insertelement <16 x i16> %v, i16 %in, i32 1 -+ store <16 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind { -+; CHECK-LABEL: insert_16xi16_upper: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 0 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %v_new = insertelement <16 x i16> %v, i16 %in, i32 8 -+ store <16 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind { -+; CHECK-LABEL: insert_8xi32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i32>, ptr %src -+ %v_new = insertelement <8 x i32> %v, i32 %in, i32 1 -+ store <8 x i32> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind { -+; CHECK-LABEL: insert_4xi64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i64>, ptr %src -+ %v_new = insertelement <4 x i64> %v, i64 %in, i32 1 -+ store <4 x i64> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind { -+; CHECK-LABEL: insert_8xfloat: -+; CHECK: # %bb.0: -+; CHECK-NEXT: movfr2gr.s $a2, $fa0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x float>, ptr %src -+ %v_new = insertelement <8 x float> %v, float %in, i32 1 -+ store <8 x float> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind { -+; CHECK-LABEL: insert_4xdouble: -+; CHECK: # %bb.0: -+; CHECK-NEXT: movfr2gr.d $a2, $fa0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x double>, ptr %src -+ %v_new = insertelement <4 x double> %v, double %in, i32 1 -+ store <4 x double> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_32xi8_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a4, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a4, 5 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 -+; CHECK-NEXT: st.b $a2, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx -+ store <32 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_16xi16_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a4, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a4, 5 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 -+; CHECK-NEXT: st.h $a2, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx -+ store <16 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_8xi32_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a4, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a4, 5 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 -+; CHECK-NEXT: st.w $a2, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i32>, ptr %src -+ %v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx -+ store <8 x i32> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_4xi64_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a4, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a4, 5 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 -+; CHECK-NEXT: st.d $a2, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i64>, ptr %src -+ %v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx -+ store <4 x i64> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_8xfloat_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 -+; CHECK-NEXT: xvld $xr1, $a0, 0 -+; CHECK-NEXT: xvst $xr1, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -+; CHECK-NEXT: fst.s $fa0, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x float>, ptr %src -+ %v_new = insertelement <8 x float> %v, float %in, i32 %idx -+ store <8 x float> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_4xdouble_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 -+; CHECK-NEXT: xvld $xr1, $a0, 0 -+; CHECK-NEXT: xvst $xr1, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -+; CHECK-NEXT: fst.d $fa0, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x double>, ptr %src -+ %v_new = insertelement <4 x double> %v, double %in, i32 %idx -+ store <4 x double> %v_new, ptr %dst -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll -new file mode 100644 -index 000000000000..a9834591aa0e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll -@@ -0,0 +1,196 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @insert_16xi8(ptr %src, ptr %dst, i8 %ins) nounwind { -+; CHECK-LABEL: insert_16xi8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i8>, ptr %src -+ %v_new = insertelement <16 x i8> %v, i8 %ins, i32 1 -+ store <16 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xi16(ptr %src, ptr %dst, i16 %ins) nounwind { -+; CHECK-LABEL: insert_8xi16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i16>, ptr %src -+ %v_new = insertelement <8 x i16> %v, i16 %ins, i32 1 -+ store <8 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xi32(ptr %src, ptr %dst, i32 %ins) nounwind { -+; CHECK-LABEL: insert_4xi32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %v_new = insertelement <4 x i32> %v, i32 %ins, i32 1 -+ store <4 x i32> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_2xi64(ptr %src, ptr %dst, i64 %ins) nounwind { -+; CHECK-LABEL: insert_2xi64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x i64>, ptr %src -+ %v_new = insertelement <2 x i64> %v, i64 %ins, i32 1 -+ store <2 x i64> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xfloat(ptr %src, ptr %dst, float %ins) nounwind { -+; CHECK-LABEL: insert_4xfloat: -+; CHECK: # %bb.0: -+; CHECK-NEXT: movfr2gr.s $a2, $fa0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x float>, ptr %src -+ %v_new = insertelement <4 x float> %v, float %ins, i32 1 -+ store <4 x float> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind { -+; CHECK-LABEL: insert_2xdouble: -+; CHECK: # %bb.0: -+; CHECK-NEXT: movfr2gr.d $a2, $fa0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x double>, ptr %src -+ %v_new = insertelement <2 x double> %v, double %ins, i32 1 -+ store <2 x double> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_16xi8_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vst $vr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0 -+; CHECK-NEXT: st.b $a2, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i8>, ptr %src -+ %v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx -+ store <16 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_8xi16_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vst $vr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1 -+; CHECK-NEXT: st.h $a2, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i16>, ptr %src -+ %v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx -+ store <8 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_4xi32_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vst $vr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2 -+; CHECK-NEXT: st.w $a2, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx -+ store <4 x i32> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_2xi64_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vst $vr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3 -+; CHECK-NEXT: st.d $a2, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x i64>, ptr %src -+ %v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx -+ store <2 x i64> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_4xfloat_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr1, $a0, 0 -+; CHECK-NEXT: vst $vr1, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2 -+; CHECK-NEXT: fst.s $fa0, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x float>, ptr %src -+ %v_new = insertelement <4 x float> %v, float %ins, i32 %idx -+ store <4 x float> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_2xdouble_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr1, $a0, 0 -+; CHECK-NEXT: vst $vr1, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3 -+; CHECK-NEXT: fst.d $fa0, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x double>, ptr %src -+ %v_new = insertelement <2 x double> %v, double %ins, i32 %idx -+ store <2 x double> %v_new, ptr %dst -+ ret void -+} --- -2.20.1 - - -From 8c15dfb5e6c74537f5748936702e4d077d09815d Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 25 Oct 2023 17:00:32 +0800 -Subject: [PATCH 16/35] [LoongArch] Custom lowering `ISD::BUILD_VECTOR` - -(cherry picked from commit add224c0a094d20389d3659f7b6e496df461a976) ---- - .../LoongArch/LoongArchISelDAGToDAG.cpp | 52 +- - .../LoongArch/LoongArchISelLowering.cpp | 102 +++- - .../Target/LoongArch/LoongArchISelLowering.h | 1 + - .../LoongArch/LoongArchLASXInstrInfo.td | 13 + - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 12 +- - .../CodeGen/LoongArch/lasx/build-vector.ll | 551 ++++++++++++++++++ - .../CodeGen/LoongArch/lsx/build-vector.ll | 376 ++++++++++++ - .../LoongArch/lsx/ir-instruction/mul.ll | 28 +- - 8 files changed, 1112 insertions(+), 23 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/build-vector.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/build-vector.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -index f55184019988..01b2f720f902 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -@@ -77,13 +77,63 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { - return; - } - case ISD::BITCAST: { -- if (VT.is128BitVector() || VT.is512BitVector()) { -+ if (VT.is128BitVector() || VT.is256BitVector()) { - ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); - CurDAG->RemoveDeadNode(Node); - return; - } - break; - } -+ case ISD::BUILD_VECTOR: { -+ // Select appropriate [x]vrepli.[bhwd] instructions for constant splats of -+ // 128/256-bit when LSX/LASX is enabled. -+ BuildVectorSDNode *BVN = cast(Node); -+ APInt SplatValue, SplatUndef; -+ unsigned SplatBitSize; -+ bool HasAnyUndefs; -+ unsigned Op; -+ EVT ViaVecTy; -+ bool Is128Vec = BVN->getValueType(0).is128BitVector(); -+ bool Is256Vec = BVN->getValueType(0).is256BitVector(); -+ -+ if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec)) -+ break; -+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, -+ HasAnyUndefs, 8)) -+ break; -+ -+ switch (SplatBitSize) { -+ default: -+ break; -+ case 8: -+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B; -+ ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8; -+ break; -+ case 16: -+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H; -+ ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16; -+ break; -+ case 32: -+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W; -+ ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32; -+ break; -+ case 64: -+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D; -+ ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64; -+ break; -+ } -+ -+ SDNode *Res; -+ // If we have a signed 10 bit integer, we can splat it directly. -+ if (SplatValue.isSignedIntN(10)) { -+ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, -+ ViaVecTy.getVectorElementType()); -+ Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm); -+ ReplaceNode(Node, Res); -+ return; -+ } -+ break; -+ } - } - - // Select the default instruction. -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 492339ce2151..1b60bfc3bddb 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -234,11 +234,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::BITCAST, VT, Legal); - setOperationAction(ISD::UNDEF, VT, Legal); - -- // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it -- // will be `Custom` handled in the future. -- setOperationAction(ISD::BUILD_VECTOR, VT, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -@@ -265,10 +263,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::BITCAST, VT, Legal); - setOperationAction(ISD::UNDEF, VT, Legal); - -- // FIXME: Same as above. -- setOperationAction(ISD::BUILD_VECTOR, VT, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -@@ -371,10 +368,105 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - return lowerWRITE_REGISTER(Op, DAG); - case ISD::INSERT_VECTOR_ELT: - return lowerINSERT_VECTOR_ELT(Op, DAG); -+ case ISD::BUILD_VECTOR: -+ return lowerBUILD_VECTOR(Op, DAG); - } - return SDValue(); - } - -+static bool isConstantOrUndef(const SDValue Op) { -+ if (Op->isUndef()) -+ return true; -+ if (isa(Op)) -+ return true; -+ if (isa(Op)) -+ return true; -+ return false; -+} -+ -+static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { -+ for (unsigned i = 0; i < Op->getNumOperands(); ++i) -+ if (isConstantOrUndef(Op->getOperand(i))) -+ return true; -+ return false; -+} -+ -+SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, -+ SelectionDAG &DAG) const { -+ BuildVectorSDNode *Node = cast(Op); -+ EVT ResTy = Op->getValueType(0); -+ SDLoc DL(Op); -+ APInt SplatValue, SplatUndef; -+ unsigned SplatBitSize; -+ bool HasAnyUndefs; -+ bool Is128Vec = ResTy.is128BitVector(); -+ bool Is256Vec = ResTy.is256BitVector(); -+ -+ if ((!Subtarget.hasExtLSX() || !Is128Vec) && -+ (!Subtarget.hasExtLASX() || !Is256Vec)) -+ return SDValue(); -+ -+ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, -+ /*MinSplatBits=*/8) && -+ SplatBitSize <= 64) { -+ // We can only cope with 8, 16, 32, or 64-bit elements. -+ if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && -+ SplatBitSize != 64) -+ return SDValue(); -+ -+ EVT ViaVecTy; -+ -+ switch (SplatBitSize) { -+ default: -+ return SDValue(); -+ case 8: -+ ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; -+ break; -+ case 16: -+ ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; -+ break; -+ case 32: -+ ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; -+ break; -+ case 64: -+ ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; -+ break; -+ } -+ -+ // SelectionDAG::getConstant will promote SplatValue appropriately. -+ SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); -+ -+ // Bitcast to the type we originally wanted. -+ if (ViaVecTy != ResTy) -+ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); -+ -+ return Result; -+ } -+ -+ if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) -+ return Op; -+ -+ if (!isConstantOrUndefBUILD_VECTOR(Node)) { -+ // Use INSERT_VECTOR_ELT operations rather than expand to stores. -+ // The resulting code is the same length as the expansion, but it doesn't -+ // use memory operations. -+ EVT ResTy = Node->getValueType(0); -+ -+ assert(ResTy.isVector()); -+ -+ unsigned NumElts = ResTy.getVectorNumElements(); -+ SDValue Vector = DAG.getUNDEF(ResTy); -+ for (unsigned i = 0; i < NumElts; ++i) { -+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, -+ Node->getOperand(i), -+ DAG.getConstant(i, DL, Subtarget.getGRLenVT())); -+ } -+ return Vector; -+ } -+ -+ return SDValue(); -+} -+ - SDValue - LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 29028ff963d0..111376306374 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -276,6 +276,7 @@ private: - SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - - bool isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const override; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 475565db15c9..4487152fb42b 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -33,6 +33,13 @@ def lasxsplati32 - def lasxsplati64 - : PatFrag<(ops node:$e0), - (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplatf32 -+ : PatFrag<(ops node:$e0), -+ (v8f32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplatf64 -+ : PatFrag<(ops node:$e0), -+ (v4f64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; - - //===----------------------------------------------------------------------===// - // Instruction class templates -@@ -1411,6 +1418,12 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), - def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), - (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; - -+// XVREPL128VEI_{W/D} -+def : Pat<(lasxsplatf32 FPR32:$fj), -+ (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; -+def : Pat<(lasxsplatf64 FPR64:$fj), -+ (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; -+ - // Loads/Stores - foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { - defm : LdPat; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index d8fd132a1c59..deac5015882d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -141,9 +141,13 @@ def lsxsplati16 : PatFrag<(ops node:$e0), - def lsxsplati32 : PatFrag<(ops node:$e0), - (v4i32 (build_vector node:$e0, node:$e0, - node:$e0, node:$e0))>; -- - def lsxsplati64 : PatFrag<(ops node:$e0), - (v2i64 (build_vector node:$e0, node:$e0))>; -+def lsxsplatf32 : PatFrag<(ops node:$e0), -+ (v4f32 (build_vector node:$e0, node:$e0, -+ node:$e0, node:$e0))>; -+def lsxsplatf64 : PatFrag<(ops node:$e0), -+ (v2f64 (build_vector node:$e0, node:$e0))>; - - def to_valid_timm : SDNodeXForm(N); -@@ -1498,6 +1502,12 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), - def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), - (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; - -+// VREPLVEI_{W/D} -+def : Pat<(lsxsplatf32 FPR32:$fj), -+ (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; -+def : Pat<(lsxsplatf64 FPR64:$fj), -+ (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; -+ - // Loads/Stores - foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { - defm : LdPat; -diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -new file mode 100644 -index 000000000000..6824ab5cda8d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -@@ -0,0 +1,551 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @buildvector_v32i8_splat(ptr %dst, i8 %a0) nounwind { -+; CHECK-LABEL: buildvector_v32i8_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.b $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <32 x i8> undef, i8 %a0, i8 0 -+ %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -+ store <32 x i8> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i16_splat(ptr %dst, i16 %a0) nounwind { -+; CHECK-LABEL: buildvector_v16i16_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <16 x i16> undef, i16 %a0, i8 0 -+ %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -+ store <16 x i16> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i32_splat(ptr %dst, i32 %a0) nounwind { -+; CHECK-LABEL: buildvector_v8i32_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <8 x i32> undef, i32 %a0, i8 0 -+ %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -+ store <8 x i32> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i64_splat(ptr %dst, i64 %a0) nounwind { -+; CHECK-LABEL: buildvector_v4i64_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <4 x i64> undef, i64 %a0, i8 0 -+ %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -+ store <4 x i64> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { -+; CHECK-LABEL: buildvector_v8f32_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 -+; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <8 x float> undef, float %a0, i8 0 -+ %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer -+ store <8 x float> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { -+; CHECK-LABEL: buildvector_v4f64_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -+; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <4 x double> undef, double %a0, i8 0 -+ %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer -+ store <4 x double> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v32i8_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v32i8_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.b $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <32 x i8> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i16_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v16i16_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.h $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <16 x i16> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i32_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v8i32_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.w $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x i32> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i64_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4i64_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.d $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x i64> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f32_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: lu12i.w $a1, 260096 -+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x float> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f64_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4f64_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: lu52i.d $a1, $zero, 1023 -+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x double> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v32i8_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v32i8_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <32 x i8> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i16_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v16i16_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <16 x i16> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i32_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v8i32_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x i32> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i64_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4i64_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x i64> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f32_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f32_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x float> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f64_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4f64_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x double> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind { -+; CHECK-LABEL: buildvector_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 -+; CHECK-NEXT: ld.b $a1, $sp, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 -+; CHECK-NEXT: ld.b $a1, $sp, 8 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 -+; CHECK-NEXT: ld.b $a1, $sp, 16 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 -+; CHECK-NEXT: ld.b $a1, $sp, 24 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 -+; CHECK-NEXT: ld.b $a1, $sp, 32 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 -+; CHECK-NEXT: ld.b $a1, $sp, 40 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 -+; CHECK-NEXT: ld.b $a1, $sp, 48 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 -+; CHECK-NEXT: ld.b $a1, $sp, 56 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 -+; CHECK-NEXT: ld.b $a1, $sp, 64 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 -+; CHECK-NEXT: ld.b $a1, $sp, 72 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 80 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 88 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 96 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 104 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 112 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 120 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 128 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 136 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 144 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 152 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 160 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 168 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 176 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 184 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 192 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 -+ %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 -+ %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 -+ %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3 -+ %ins4 = insertelement <32 x i8> %ins3, i8 %a4, i32 4 -+ %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5 -+ %ins6 = insertelement <32 x i8> %ins5, i8 %a6, i32 6 -+ %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7 -+ %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8 -+ %ins9 = insertelement <32 x i8> %ins8, i8 %a9, i32 9 -+ %ins10 = insertelement <32 x i8> %ins9, i8 %a10, i32 10 -+ %ins11 = insertelement <32 x i8> %ins10, i8 %a11, i32 11 -+ %ins12 = insertelement <32 x i8> %ins11, i8 %a12, i32 12 -+ %ins13 = insertelement <32 x i8> %ins12, i8 %a13, i32 13 -+ %ins14 = insertelement <32 x i8> %ins13, i8 %a14, i32 14 -+ %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15 -+ %ins16 = insertelement <32 x i8> %ins15, i8 %a16, i32 16 -+ %ins17 = insertelement <32 x i8> %ins16, i8 %a17, i32 17 -+ %ins18 = insertelement <32 x i8> %ins17, i8 %a18, i32 18 -+ %ins19 = insertelement <32 x i8> %ins18, i8 %a19, i32 19 -+ %ins20 = insertelement <32 x i8> %ins19, i8 %a20, i32 20 -+ %ins21 = insertelement <32 x i8> %ins20, i8 %a21, i32 21 -+ %ins22 = insertelement <32 x i8> %ins21, i8 %a22, i32 22 -+ %ins23 = insertelement <32 x i8> %ins22, i8 %a23, i32 23 -+ %ins24 = insertelement <32 x i8> %ins23, i8 %a24, i32 24 -+ %ins25 = insertelement <32 x i8> %ins24, i8 %a25, i32 25 -+ %ins26 = insertelement <32 x i8> %ins25, i8 %a26, i32 26 -+ %ins27 = insertelement <32 x i8> %ins26, i8 %a27, i32 27 -+ %ins28 = insertelement <32 x i8> %ins27, i8 %a28, i32 28 -+ %ins29 = insertelement <32 x i8> %ins28, i8 %a29, i32 29 -+ %ins30 = insertelement <32 x i8> %ins29, i8 %a30, i32 30 -+ %ins31 = insertelement <32 x i8> %ins30, i8 %a31, i32 31 -+ store <32 x i8> %ins31, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { -+; CHECK-LABEL: buildvector_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 -+; CHECK-NEXT: ld.h $a1, $sp, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 -+; CHECK-NEXT: ld.h $a1, $sp, 8 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 16 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 24 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 32 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 40 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 48 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 56 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 64 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 -+ %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 -+ %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 -+ %ins3 = insertelement <16 x i16> %ins2, i16 %a3, i32 3 -+ %ins4 = insertelement <16 x i16> %ins3, i16 %a4, i32 4 -+ %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5 -+ %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6 -+ %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7 -+ %ins8 = insertelement <16 x i16> %ins7, i16 %a8, i32 8 -+ %ins9 = insertelement <16 x i16> %ins8, i16 %a9, i32 9 -+ %ins10 = insertelement <16 x i16> %ins9, i16 %a10, i32 10 -+ %ins11 = insertelement <16 x i16> %ins10, i16 %a11, i32 11 -+ %ins12 = insertelement <16 x i16> %ins11, i16 %a12, i32 12 -+ %ins13 = insertelement <16 x i16> %ins12, i16 %a13, i32 13 -+ %ins14 = insertelement <16 x i16> %ins13, i16 %a14, i32 14 -+ %ins15 = insertelement <16 x i16> %ins14, i16 %a15, i32 15 -+ store <16 x i16> %ins15, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { -+; CHECK-LABEL: buildvector_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 2 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 3 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a5, 4 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a6, 5 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a7, 6 -+; CHECK-NEXT: ld.w $a1, $sp, 0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <8 x i32> undef, i32 %a0, i32 0 -+ %ins1 = insertelement <8 x i32> %ins0, i32 %a1, i32 1 -+ %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2 -+ %ins3 = insertelement <8 x i32> %ins2, i32 %a3, i32 3 -+ %ins4 = insertelement <8 x i32> %ins3, i32 %a4, i32 4 -+ %ins5 = insertelement <8 x i32> %ins4, i32 %a5, i32 5 -+ %ins6 = insertelement <8 x i32> %ins5, i32 %a6, i32 6 -+ %ins7 = insertelement <8 x i32> %ins6, i32 %a7, i32 7 -+ store <8 x i32> %ins7, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { -+; CHECK-LABEL: buildvector_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a3, 2 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a4, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 -+ %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 -+ %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2 -+ %ins3 = insertelement <4 x i64> %ins2, i64 %a3, i32 3 -+ store <4 x i64> %ins3, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { -+; CHECK-LABEL: buildvector_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: movfr2gr.s $a1, $fa0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 -+; CHECK-NEXT: movfr2gr.s $a1, $fa1 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1 -+; CHECK-NEXT: movfr2gr.s $a1, $fa2 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2 -+; CHECK-NEXT: movfr2gr.s $a1, $fa3 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 3 -+; CHECK-NEXT: movfr2gr.s $a1, $fa4 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 4 -+; CHECK-NEXT: movfr2gr.s $a1, $fa5 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5 -+; CHECK-NEXT: movfr2gr.s $a1, $fa6 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 6 -+; CHECK-NEXT: movfr2gr.s $a1, $fa7 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <8 x float> undef, float %a0, i32 0 -+ %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1 -+ %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2 -+ %ins3 = insertelement <8 x float> %ins2, float %a3, i32 3 -+ %ins4 = insertelement <8 x float> %ins3, float %a4, i32 4 -+ %ins5 = insertelement <8 x float> %ins4, float %a5, i32 5 -+ %ins6 = insertelement <8 x float> %ins5, float %a6, i32 6 -+ %ins7 = insertelement <8 x float> %ins6, float %a7, i32 7 -+ store <8 x float> %ins7, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind { -+; CHECK-LABEL: buildvector_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: movfr2gr.d $a1, $fa0 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 -+; CHECK-NEXT: movfr2gr.d $a1, $fa1 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1 -+; CHECK-NEXT: movfr2gr.d $a1, $fa2 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2 -+; CHECK-NEXT: movfr2gr.d $a1, $fa3 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <4 x double> undef, double %a0, i32 0 -+ %ins1 = insertelement <4 x double> %ins0, double %a1, i32 1 -+ %ins2 = insertelement <4 x double> %ins1, double %a2, i32 2 -+ %ins3 = insertelement <4 x double> %ins2, double %a3, i32 3 -+ store <4 x double> %ins3, ptr %dst -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -new file mode 100644 -index 000000000000..3a74db5e1acb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -@@ -0,0 +1,376 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @buildvector_v16i8_splat(ptr %dst, i8 %a0) nounwind { -+; CHECK-LABEL: buildvector_v16i8_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.b $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <16 x i8> undef, i8 %a0, i8 0 -+ %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -+ store <16 x i8> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i16_splat(ptr %dst, i16 %a0) nounwind { -+; CHECK-LABEL: buildvector_v8i16_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.h $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <8 x i16> undef, i16 %a0, i8 0 -+ %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -+ store <8 x i16> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i32_splat(ptr %dst, i32 %a0) nounwind { -+; CHECK-LABEL: buildvector_v4i32_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <4 x i32> undef, i32 %a0, i8 0 -+ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -+ store <4 x i32> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2i64_splat(ptr %dst, i64 %a0) nounwind { -+; CHECK-LABEL: buildvector_v2i64_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <2 x i64> undef, i64 %a0, i8 0 -+ %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -+ store <2 x i64> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f32_splat(ptr %dst, float %a0) nounwind { -+; CHECK-LABEL: buildvector_v4f32_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <4 x float> undef, float %a0, i8 0 -+ %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer -+ store <4 x float> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f64_splat(ptr %dst, double %a0) nounwind { -+; CHECK-LABEL: buildvector_v2f64_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <2 x double> undef, double %a0, i8 0 -+ %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer -+ store <2 x double> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i8_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v16i8_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.b $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <16 x i8> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i16_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v8i16_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.h $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x i16> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i32_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4i32_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.w $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x i32> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2i64_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2i64_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.d $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <2 x i64> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f32_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: lu12i.w $a1, 260096 -+; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x float> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f64_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f64_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: lu52i.d $a1, $zero, 1023 -+; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <2 x double> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i8_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v16i8_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <16 x i8> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i16_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v8i16_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x i16> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i32_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4i32_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x i32> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2i64_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2i64_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <2 x i64> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f32_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f32_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x float> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f64_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f64_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <2 x double> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { -+; CHECK-LABEL: buildvector_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 -+; CHECK-NEXT: ld.b $a1, $sp, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 -+; CHECK-NEXT: ld.b $a1, $sp, 8 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 -+; CHECK-NEXT: ld.b $a1, $sp, 16 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 -+; CHECK-NEXT: ld.b $a1, $sp, 24 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 -+; CHECK-NEXT: ld.b $a1, $sp, 32 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 -+; CHECK-NEXT: ld.b $a1, $sp, 40 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 -+; CHECK-NEXT: ld.b $a1, $sp, 48 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 -+; CHECK-NEXT: ld.b $a1, $sp, 56 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 -+; CHECK-NEXT: ld.b $a1, $sp, 64 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 -+ %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 -+ %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 -+ %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 -+ %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4 -+ %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5 -+ %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 -+ %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7 -+ %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8 -+ %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9 -+ %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10 -+ %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11 -+ %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12 -+ %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13 -+ %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14 -+ %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15 -+ store <16 x i8> %ins15, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { -+; CHECK-LABEL: buildvector_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 -+; CHECK-NEXT: ld.h $a1, $sp, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 -+ %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 -+ %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2 -+ %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 -+ %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4 -+ %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5 -+ %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6 -+ %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7 -+ store <8 x i16> %ins7, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { -+; CHECK-LABEL: buildvector_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0 -+ %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1 -+ %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2 -+ %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3 -+ store <4 x i32> %ins3, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind { -+; CHECK-LABEL: buildvector_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 -+ %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 -+ store <2 x i64> %ins1, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind { -+; CHECK-LABEL: buildvector_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: movfr2gr.s $a1, $fa0 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 -+; CHECK-NEXT: movfr2gr.s $a1, $fa1 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1 -+; CHECK-NEXT: movfr2gr.s $a1, $fa2 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 2 -+; CHECK-NEXT: movfr2gr.s $a1, $fa3 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <4 x float> undef, float %a0, i32 0 -+ %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1 -+ %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2 -+ %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3 -+ store <4 x float> %ins3, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind { -+; CHECK-LABEL: buildvector_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: movfr2gr.d $a1, $fa0 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 -+; CHECK-NEXT: movfr2gr.d $a1, $fa1 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <2 x double> undef, double %a0, i32 0 -+ %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 -+ store <2 x double> %ins1, ptr %dst -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -index 5060240cd8b1..d0be9cb7e3c8 100644 ---- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -@@ -180,10 +180,9 @@ entry: - define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { - ; CHECK-LABEL: mul_v16i8_17: - ; CHECK: # %bb.0: # %entry --; CHECK-NEXT: ori $a2, $zero, 17 --; CHECK-NEXT: vreplgr2vr.b $vr0, $a2 --; CHECK-NEXT: vld $vr1, $a1, 0 --; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.b $vr1, 17 -+; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 - ; CHECK-NEXT: vst $vr0, $a0, 0 - ; CHECK-NEXT: ret - entry: -@@ -196,10 +195,9 @@ entry: - define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { - ; CHECK-LABEL: mul_v8i16_17: - ; CHECK: # %bb.0: # %entry --; CHECK-NEXT: ori $a2, $zero, 17 --; CHECK-NEXT: vreplgr2vr.h $vr0, $a2 --; CHECK-NEXT: vld $vr1, $a1, 0 --; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, 17 -+; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 - ; CHECK-NEXT: vst $vr0, $a0, 0 - ; CHECK-NEXT: ret - entry: -@@ -212,10 +210,9 @@ entry: - define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { - ; CHECK-LABEL: mul_v4i32_17: - ; CHECK: # %bb.0: # %entry --; CHECK-NEXT: ori $a2, $zero, 17 --; CHECK-NEXT: vreplgr2vr.w $vr0, $a2 --; CHECK-NEXT: vld $vr1, $a1, 0 --; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.w $vr1, 17 -+; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 - ; CHECK-NEXT: vst $vr0, $a0, 0 - ; CHECK-NEXT: ret - entry: -@@ -228,10 +225,9 @@ entry: - define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { - ; CHECK-LABEL: mul_v2i64_17: - ; CHECK: # %bb.0: # %entry --; CHECK-NEXT: ori $a2, $zero, 17 --; CHECK-NEXT: vreplgr2vr.d $vr0, $a2 --; CHECK-NEXT: vld $vr1, $a1, 0 --; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.d $vr1, 17 -+; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 - ; CHECK-NEXT: vst $vr0, $a0, 0 - ; CHECK-NEXT: ret - entry: --- -2.20.1 - - -From 62970fc545cedb4640ded25af832fd233c16dc85 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 14 Nov 2023 17:58:52 +0800 -Subject: [PATCH 17/35] [LoongArch] Add more and/or/xor patterns for vector - types - -(cherry picked from commit ca66df3b021017fedf08f0779f5bfc7898dbdd29) ---- - .../LoongArch/LoongArchLASXInstrInfo.td | 21 +-- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 21 +-- - .../LoongArch/lasx/ir-instruction/and.ll | 125 ++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/or.ll | 125 ++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/xor.ll | 125 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/and.ll | 125 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/or.ll | 125 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/xor.ll | 125 ++++++++++++++++++ - 8 files changed, 774 insertions(+), 18 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 4487152fb42b..a5652472481a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1184,10 +1184,6 @@ multiclass PatShiftXrUimm { - (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; - } - --class PatXrXrB -- : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -- (Inst LASX256:$xj, LASX256:$xk)>; -- - let Predicates = [HasExtLASX] in { - - // XVADD_{B/H/W/D} -@@ -1235,13 +1231,20 @@ defm : PatXrXr; - defm : PatXrXrU; - - // XVAND_V --def : PatXrXrB; --// XVNOR_V --def : PatXrXrB; -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in -+def : Pat<(and (vt LASX256:$xj), (vt LASX256:$xk)), -+ (XVAND_V LASX256:$xj, LASX256:$xk)>; -+// XVOR_V -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in -+def : Pat<(or (vt LASX256:$xj), (vt LASX256:$xk)), -+ (XVOR_V LASX256:$xj, LASX256:$xk)>; - // XVXOR_V --def : PatXrXrB; -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in -+def : Pat<(xor (vt LASX256:$xj), (vt LASX256:$xk)), -+ (XVXOR_V LASX256:$xj, LASX256:$xk)>; - // XVNOR_V --def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 LASX256:$xk))), -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in -+def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))), - (XVNOR_V LASX256:$xj, LASX256:$xk)>; - - // XVANDI_B -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index deac5015882d..5645ce51194a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1261,10 +1261,6 @@ multiclass PatShiftVrUimm { - (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; - } - --class PatVrVrB -- : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -- (Inst LSX128:$vj, LSX128:$vk)>; -- - let Predicates = [HasExtLSX] in { - - // VADD_{B/H/W/D} -@@ -1312,13 +1308,20 @@ defm : PatVrVr; - defm : PatVrVrU; - - // VAND_V --def : PatVrVrB; --// VNOR_V --def : PatVrVrB; -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in -+def : Pat<(and (vt LSX128:$vj), (vt LSX128:$vk)), -+ (VAND_V LSX128:$vj, LSX128:$vk)>; -+// VOR_V -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in -+def : Pat<(or (vt LSX128:$vj), (vt LSX128:$vk)), -+ (VOR_V LSX128:$vj, LSX128:$vk)>; - // VXOR_V --def : PatVrVrB; -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in -+def : Pat<(xor (vt LSX128:$vj), (vt LSX128:$vk)), -+ (VXOR_V LSX128:$vj, LSX128:$vk)>; - // VNOR_V --def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in -+def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))), - (VNOR_V LSX128:$vj, LSX128:$vk)>; - - // VANDI_B -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll -new file mode 100644 -index 000000000000..98c87cadeeb5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @and_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = and <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = and <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = and <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = and <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @and_u_v32i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvandi.b $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = and <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v16i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, 31 -+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = and <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v8i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.w $xr1, 31 -+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = and <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v4i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.d $xr1, 31 -+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = and <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll -new file mode 100644 -index 000000000000..f37cbf1cefed ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @or_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = or <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = or <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = or <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = or <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @or_u_v32i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvori.b $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = or <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v16i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, 31 -+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = or <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v8i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.w $xr1, 31 -+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = or <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v4i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.d $xr1, 31 -+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = or <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll -new file mode 100644 -index 000000000000..c2fb1462b7a2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @xor_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = xor <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = xor <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = xor <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = xor <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_u_v32i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvxori.b $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = xor <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v16i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, 31 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = xor <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v8i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.w $xr1, 31 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = xor <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v4i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.d $xr1, 31 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = xor <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll -new file mode 100644 -index 000000000000..523255159a81 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @and_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = and <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = and <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = and <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = and <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @and_u_v16i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vandi.b $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = and <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v8i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, 31 -+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = and <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v4i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.w $vr1, 31 -+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = and <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v2i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.d $vr1, 31 -+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = and <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll -new file mode 100644 -index 000000000000..f124512acce7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @or_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = or <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = or <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = or <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = or <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @or_u_v16i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vori.b $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = or <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v8i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, 31 -+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = or <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v4i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.w $vr1, 31 -+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = or <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v2i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.d $vr1, 31 -+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = or <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll -new file mode 100644 -index 000000000000..ce3e49c990ff ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @xor_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = xor <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = xor <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = xor <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = xor <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_u_v16i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vxori.b $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = xor <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v8i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, 31 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = xor <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v4i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.w $vr1, 31 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = xor <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v2i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.d $vr1, 31 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = xor <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From f33b8ed69368098a23f9f14a1d3b8d62aca8b48f Mon Sep 17 00:00:00 2001 -From: leecheechen -Date: Fri, 1 Dec 2023 13:14:11 +0800 -Subject: [PATCH 18/35] [LoongArch] Add some binary IR instructions testcases - for LASX (#74031) - -The IR instructions include: -- Binary Operations: add fadd sub fsub mul fmul udiv sdiv fdiv -- Bitwise Binary Operations: shl lshr ashr - -(cherry picked from commit dbbc7c31c8e55d72dc243b244e386a25132e7215) ---- - .../LoongArch/lasx/ir-instruction/add.ll | 122 +++++++++ - .../LoongArch/lasx/ir-instruction/ashr.ll | 178 +++++++++++++ - .../LoongArch/lasx/ir-instruction/fadd.ll | 34 +++ - .../LoongArch/lasx/ir-instruction/fdiv.ll | 34 +++ - .../LoongArch/lasx/ir-instruction/fmul.ll | 34 +++ - .../LoongArch/lasx/ir-instruction/fsub.ll | 34 +++ - .../LoongArch/lasx/ir-instruction/lshr.ll | 178 +++++++++++++ - .../LoongArch/lasx/ir-instruction/mul.ll | 238 ++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/sdiv.ll | 134 ++++++++++ - .../LoongArch/lasx/ir-instruction/shl.ll | 178 +++++++++++++ - .../LoongArch/lasx/ir-instruction/sub.ll | 122 +++++++++ - .../LoongArch/lasx/ir-instruction/udiv.ll | 122 +++++++++ - 12 files changed, 1408 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll -new file mode 100644 -index 000000000000..8e4d0dc6f1c3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvadd.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = add <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvadd.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = add <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvadd.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = add <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = add <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v32i8_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v32i8_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = add <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v16i16_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v16i16_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = add <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = add <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v4i64_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v4i64_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvaddi.du $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = add <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll -new file mode 100644 -index 000000000000..fcbf0f1400fe ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @ashr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsra.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = ashr <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsra.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = ashr <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsra.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = ashr <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsra.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = ashr <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v32i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v32i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = ashr <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v32i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v32i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = ashr <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v16i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = ashr <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v16i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = ashr <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v8i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = ashr <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = ashr <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v4i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = ashr <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v4i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 63 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = ashr <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll -new file mode 100644 -index 000000000000..365bb305fc5a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fadd_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fadd_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfadd.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = fadd <8 x float> %v0, %v1 -+ store <8 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fadd_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fadd_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfadd.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = fadd <4 x double> %v0, %v1 -+ store <4 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -new file mode 100644 -index 000000000000..284121a79a49 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fdiv_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfdiv.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = fdiv <8 x float> %v0, %v1 -+ store <8 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fdiv_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfdiv.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = fdiv <4 x double> %v0, %v1 -+ store <4 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll -new file mode 100644 -index 000000000000..a48dca8d2847 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fmul_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fmul_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = fmul <8 x float> %v0, %v1 -+ store <8 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fmul_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fmul_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = fmul <4 x double> %v0, %v1 -+ store <4 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll -new file mode 100644 -index 000000000000..6164aa5a55c7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fsub_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fsub_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = fsub <8 x float> %v0, %v1 -+ store <8 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fsub_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fsub_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = fsub <4 x double> %v0, %v1 -+ store <4 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll -new file mode 100644 -index 000000000000..24be69d8032a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @lshr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsrl.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = lshr <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsrl.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = lshr <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsrl.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = lshr <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsrl.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = lshr <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v32i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v32i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = lshr <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v32i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v32i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = lshr <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v16i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = lshr <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v16i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = lshr <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v8i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = lshr <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = lshr <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v4i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = lshr <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v4i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 63 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = lshr <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll -new file mode 100644 -index 000000000000..dcb893caa255 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll -@@ -0,0 +1,238 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @mul_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmul.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = mul <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = mul <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmul.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = mul <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmul.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = mul <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_square_v32i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = mul <32 x i8> %v0, %v0 -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v16i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = mul <16 x i16> %v0, %v0 -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v8i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = mul <8 x i32> %v0, %v0 -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v4i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = mul <4 x i64> %v0, %v0 -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v32i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v32i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.b $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = mul <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v16i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v16i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.h $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = mul <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v8i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v8i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.w $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = mul <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v4i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v4i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.d $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = mul <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v32i8_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v32i8_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.b $xr1, 17 -+; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = mul <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v16i16_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v16i16_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, 17 -+; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = mul <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v8i32_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v8i32_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.w $xr1, 17 -+; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = mul <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v4i64_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v4i64_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.d $xr1, 17 -+; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = mul <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll -new file mode 100644 -index 000000000000..e3635a5f14a2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll -@@ -0,0 +1,134 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @sdiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = sdiv <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = sdiv <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = sdiv <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = sdiv <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v32i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v32i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.b $xr1, $xr0, 7 -+; CHECK-NEXT: xvsrli.b $xr1, $xr1, 5 -+; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = sdiv <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v16i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v16i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.h $xr1, $xr0, 15 -+; CHECK-NEXT: xvsrli.h $xr1, $xr1, 13 -+; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = sdiv <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v8i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v8i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.w $xr1, $xr0, 31 -+; CHECK-NEXT: xvsrli.w $xr1, $xr1, 29 -+; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = sdiv <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v4i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v4i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.d $xr1, $xr0, 63 -+; CHECK-NEXT: xvsrli.d $xr1, $xr1, 61 -+; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = sdiv <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll -new file mode 100644 -index 000000000000..8a02c7e3ac97 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @shl_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsll.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = shl <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsll.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = shl <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsll.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = shl <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsll.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = shl <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v32i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v32i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = shl <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v32i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v32i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.b $xr0, $xr0, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = shl <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v16i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v16i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = shl <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v16i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v16i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = shl <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v8i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v8i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = shl <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.w $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = shl <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v4i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v4i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = shl <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v4i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v4i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.d $xr0, $xr0, 63 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = shl <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll -new file mode 100644 -index 000000000000..bcfff1651477 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsub.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = sub <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsub.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = sub <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsub.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = sub <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsub.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = sub <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v32i8_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v32i8_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = sub <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v16i16_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v16i16_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = sub <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = sub <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v4i64_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v4i64_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsubi.du $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = sub <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll -new file mode 100644 -index 000000000000..e78084c7186d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @udiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = udiv <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = udiv <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = udiv <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = udiv <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v32i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v32i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = udiv <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v16i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v16i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = udiv <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v8i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v8i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = udiv <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v4i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v4i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = udiv <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From 1b20d45ced302fa921b54294758687bc2c1df220 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Sat, 2 Dec 2023 14:25:17 +0800 -Subject: [PATCH 19/35] [LoongArch] Override TargetLowering::isShuffleMaskLegal - - By default, `isShuffleMaskLegal` always returns true, which can result - in the expansion of `BUILD_VECTOR` into a `VECTOR_SHUFFLE` node in - certain situations. Subsequently, the `VECTOR_SHUFFLE` node is expanded - again into a `BUILD_VECTOR`, leading to an infinite loop. - To address this, we always return false, allowing the expansion of - `BUILD_VECTOR` through the stack. - -(cherry picked from commit 66a3e4fafb6eae19764f8a192ca3a116c0554211) ---- - .../LoongArch/LoongArchISelLowering.cpp | 10 +++++++++ - .../Target/LoongArch/LoongArchISelLowering.h | 5 +++++ - .../CodeGen/LoongArch/lsx/build-vector.ll | 22 +++++++++++++++++++ - 3 files changed, 37 insertions(+) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 1b60bfc3bddb..e45f21265d7b 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -239,6 +239,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { -+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, - Legal); -@@ -268,6 +269,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { -+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, - Legal); -@@ -370,10 +372,18 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - return lowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::BUILD_VECTOR: - return lowerBUILD_VECTOR(Op, DAG); -+ case ISD::VECTOR_SHUFFLE: -+ return lowerVECTOR_SHUFFLE(Op, DAG); - } - return SDValue(); - } - -+SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, -+ SelectionDAG &DAG) const { -+ // TODO: custom shuffle. -+ return SDValue(); -+} -+ - static bool isConstantOrUndef(const SDValue Op) { - if (Op->isUndef()) - return true; -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 111376306374..2c35f9e5d378 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -230,6 +230,10 @@ public: - MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - unsigned *Fast = nullptr) const override; - -+ bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { -+ return false; -+ } -+ - private: - /// Target-specific function used to lower LoongArch calling conventions. - typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, -@@ -277,6 +281,7 @@ private: - SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; - - bool isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const override; -diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -index 3a74db5e1acb..ed1f610a5fa6 100644 ---- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -@@ -374,3 +374,25 @@ entry: - store <2 x double> %ins1, ptr %dst - ret void - } -+ -+;; BUILD_VECTOR through stack. -+;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop. -+define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -+; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -+; CHECK-NEXT: st.d $a0, $sp, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %e = extractelement <4 x i32> %v, i32 1 -+ %z = zext i32 %e to i64 -+ %r = insertelement <2 x i64> undef, i64 %z, i32 0 -+ store <2 x i64> %r, ptr %dst -+ ret void -+} --- -2.20.1 - - -From aa1ff5f878a37004975a017d84b2e87df0ea8235 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Sat, 2 Dec 2023 16:24:33 +0800 -Subject: [PATCH 20/35] Reland "[LoongArch] Support CTLZ with lsx/lasx" - -This patch simultaneously adds tests for `CTPOP`. - -This relands 07cec73dcd095035257eec1f213d273b10988130 with fix tests. - -(cherry picked from commit a60a5421b60be1bce0272385fa16846ada5eed5e) ---- - .../LoongArch/LoongArchISelLowering.cpp | 13 +- - .../LoongArch/LoongArchLASXInstrInfo.td | 11 +- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 11 +- - .../test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll | 115 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll | 115 ++++++++++++++++++ - 5 files changed, 255 insertions(+), 10 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index e45f21265d7b..358263b1a258 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -247,7 +247,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - VT, Legal); - setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); -- setOperationAction(ISD::CTPOP, VT, Legal); -+ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); - } - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -@@ -277,7 +277,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - VT, Legal); - setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); -- setOperationAction(ISD::CTPOP, VT, Legal); -+ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); - } - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -@@ -2800,6 +2800,15 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lasx_xvsrai_d: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<6>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vclz_b: -+ case Intrinsic::loongarch_lsx_vclz_h: -+ case Intrinsic::loongarch_lsx_vclz_w: -+ case Intrinsic::loongarch_lsx_vclz_d: -+ case Intrinsic::loongarch_lasx_xvclz_b: -+ case Intrinsic::loongarch_lasx_xvclz_h: -+ case Intrinsic::loongarch_lasx_xvclz_w: -+ case Intrinsic::loongarch_lasx_xvclz_d: -+ return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); - case Intrinsic::loongarch_lsx_vpcnt_b: - case Intrinsic::loongarch_lsx_vpcnt_h: - case Intrinsic::loongarch_lsx_vpcnt_w: -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index a5652472481a..960ac627578c 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1273,6 +1273,9 @@ defm : PatXrXr; - defm : PatShiftXrXr; - defm : PatShiftXrUimm; - -+// XVCLZ_{B/H/W/D} -+defm : PatXr; -+ - // XVPCNT_{B/H/W/D} - defm : PatXr; - -@@ -1590,26 +1593,26 @@ foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", - // (LAInst vty:$xj)>; - foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", - "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", -- "XVCLO_B", "XVCLZ_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", -+ "XVCLO_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", - "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", - "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in - def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), - (!cast(Inst) LASX256:$xj)>; - foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", -- "XVCLO_H", "XVCLZ_H", "XVFCVTL_S_H", "XVFCVTH_S_H", -+ "XVCLO_H", "XVFCVTL_S_H", "XVFCVTH_S_H", - "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", - "VEXT2XV_DU_HU", "XVREPLVE0_H"] in - def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), - (!cast(Inst) LASX256:$xj)>; - foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", -- "XVCLO_W", "XVCLZ_W", "XVFFINT_S_W", "XVFFINT_S_WU", -+ "XVCLO_W", "XVFFINT_S_W", "XVFFINT_S_WU", - "XVFFINTL_D_W", "XVFFINTH_D_W", - "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in - def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), - (!cast(Inst) LASX256:$xj)>; - foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", - "XVEXTL_Q_D", "XVEXTL_QU_DU", -- "XVCLO_D", "XVCLZ_D", "XVFFINT_D_L", "XVFFINT_D_LU", -+ "XVCLO_D", "XVFFINT_D_L", "XVFFINT_D_LU", - "XVREPLVE0_D"] in - def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), - (!cast(Inst) LASX256:$xj)>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 5645ce51194a..3480ade9eebf 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1350,6 +1350,9 @@ defm : PatVrVr; - defm : PatShiftVrVr; - defm : PatShiftVrUimm; - -+// VCLZ_{B/H/W/D} -+defm : PatVr; -+ - // VPCNT_{B/H/W/D} - defm : PatVr; - -@@ -1674,21 +1677,21 @@ foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", - // (LAInst vty:$vj)>; - foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", - "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", -- "VCLO_B", "VCLZ_B"] in -+ "VCLO_B"] in - def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), - (!cast(Inst) LSX128:$vj)>; - foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", -- "VCLO_H", "VCLZ_H", "VFCVTL_S_H", "VFCVTH_S_H"] in -+ "VCLO_H", "VFCVTL_S_H", "VFCVTH_S_H"] in - def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), - (!cast(Inst) LSX128:$vj)>; - foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", -- "VCLO_W", "VCLZ_W", "VFFINT_S_W", "VFFINT_S_WU", -+ "VCLO_W", "VFFINT_S_W", "VFFINT_S_WU", - "VFFINTL_D_W", "VFFINTH_D_W"] in - def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), - (!cast(Inst) LSX128:$vj)>; - foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", - "VEXTL_Q_D", "VEXTL_QU_DU", -- "VCLO_D", "VCLZ_D", "VFFINT_D_L", "VFFINT_D_LU"] in -+ "VCLO_D", "VFFINT_D_L", "VFFINT_D_LU"] in - def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), - (!cast(Inst) LSX128:$vj)>; - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll -new file mode 100644 -index 000000000000..7786e399c95f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll -@@ -0,0 +1,115 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @ctpop_v32i8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v32i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvpcnt.b $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <32 x i8>, ptr %src -+ %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %v) -+ store <32 x i8> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v16i16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v16i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvpcnt.h $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <16 x i16>, ptr %src -+ %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %v) -+ store <16 x i16> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v8i32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvpcnt.w $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <8 x i32>, ptr %src -+ %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %v) -+ store <8 x i32> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v4i64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvpcnt.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <4 x i64>, ptr %src -+ %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %v) -+ store <4 x i64> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v32i8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v32i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvclz.b $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <32 x i8>, ptr %src -+ %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %v, i1 false) -+ store <32 x i8> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v16i16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v16i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvclz.h $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <16 x i16>, ptr %src -+ %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %v, i1 false) -+ store <16 x i16> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v8i32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvclz.w $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <8 x i32>, ptr %src -+ %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %v, i1 false) -+ store <8 x i32> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvclz.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <4 x i64>, ptr %src -+ %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %v, i1 false) -+ store <4 x i64> %res, ptr %dst -+ ret void -+} -+ -+declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) -+declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) -+declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) -+declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) -+declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) -+declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) -+declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) -+declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll -new file mode 100644 -index 000000000000..5df553fba7ef ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll -@@ -0,0 +1,115 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @ctpop_v16i8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v16i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpcnt.b $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <16 x i8>, ptr %src -+ %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %v) -+ store <16 x i8> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v8i16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v8i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpcnt.h $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <8 x i16>, ptr %src -+ %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %v) -+ store <8 x i16> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v4i32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpcnt.w $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <4 x i32>, ptr %src -+ %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %v) -+ store <4 x i32> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v2i64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpcnt.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <2 x i64>, ptr %src -+ %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %v) -+ store <2 x i64> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v16i8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v16i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vclz.b $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <16 x i8>, ptr %src -+ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %v, i1 false) -+ store <16 x i8> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v8i16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v8i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vclz.h $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <8 x i16>, ptr %src -+ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %v, i1 false) -+ store <8 x i16> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v4i32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vclz.w $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <4 x i32>, ptr %src -+ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %v, i1 false) -+ store <4 x i32> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vclz.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <2 x i64>, ptr %src -+ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %v, i1 false) -+ store <2 x i64> %res, ptr %dst -+ ret void -+} -+ -+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) -+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) -+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) -+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) -+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) -+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) -+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) -+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) --- -2.20.1 - - -From aa55afe1a5c74c325f009c58f48645107fd95e11 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Mon, 4 Dec 2023 10:44:39 +0800 -Subject: [PATCH 21/35] [LoongArch] Support MULHS/MULHU with lsx/lasx - -Mark MULHS/MULHU nodes as legal and adds the necessary patterns. - -(cherry picked from commit e9cd197d15300f186a5a32092103add65fbd3f50) ---- - .../LoongArch/LoongArchISelLowering.cpp | 2 + - .../LoongArch/LoongArchLASXInstrInfo.td | 4 + - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 + - llvm/test/CodeGen/LoongArch/lasx/mulh.ll | 162 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/mulh.ll | 162 ++++++++++++++++++ - 5 files changed, 334 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/mulh.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/mulh.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 358263b1a258..3d8d6898a4d5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -248,6 +248,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); - setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); -+ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); - } - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -@@ -278,6 +279,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); - setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); -+ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); - } - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 960ac627578c..240f28b0dc5a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1217,6 +1217,10 @@ defm : PatXrUimm5; - // XVMUL_{B/H/W/D} - defm : PatXrXr; - -+// XVMUH_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+ - // XVMADD_{B/H/W/D} - defm : PatXrXrXr; - // XVMSUB_{B/H/W/D} -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 3480ade9eebf..fb4726c530b5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1294,6 +1294,10 @@ defm : PatVrUimm5; - // VMUL_{B/H/W/D} - defm : PatVrVr; - -+// VMUH_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+ - // VMADD_{B/H/W/D} - defm : PatVrVrVr; - // VMSUB_{B/H/W/D} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll -new file mode 100644 -index 000000000000..aac711a4a371 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll -@@ -0,0 +1,162 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v0s = sext <32 x i8> %v0 to <32 x i16> -+ %v1s = sext <32 x i8> %v1 to <32 x i16> -+ %m = mul <32 x i16> %v0s, %v1s -+ %s = ashr <32 x i16> %m, -+ %v2 = trunc <32 x i16> %s to <32 x i8> -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v0z = zext <32 x i8> %v0 to <32 x i16> -+ %v1z = zext <32 x i8> %v1 to <32 x i16> -+ %m = mul <32 x i16> %v0z, %v1z -+ %s = lshr <32 x i16> %m, -+ %v2 = trunc <32 x i16> %s to <32 x i8> -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v0s = sext <16 x i16> %v0 to <16 x i32> -+ %v1s = sext <16 x i16> %v1 to <16 x i32> -+ %m = mul <16 x i32> %v0s, %v1s -+ %s = ashr <16 x i32> %m, -+ %v2 = trunc <16 x i32> %s to <16 x i16> -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v0z = zext <16 x i16> %v0 to <16 x i32> -+ %v1z = zext <16 x i16> %v1 to <16 x i32> -+ %m = mul <16 x i32> %v0z, %v1z -+ %s = lshr <16 x i32> %m, -+ %v2 = trunc <16 x i32> %s to <16 x i16> -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v0s = sext <8 x i32> %v0 to <8 x i64> -+ %v1s = sext <8 x i32> %v1 to <8 x i64> -+ %m = mul <8 x i64> %v0s, %v1s -+ %s = ashr <8 x i64> %m, -+ %v2 = trunc <8 x i64> %s to <8 x i32> -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v0z = zext <8 x i32> %v0 to <8 x i64> -+ %v1z = zext <8 x i32> %v1 to <8 x i64> -+ %m = mul <8 x i64> %v0z, %v1z -+ %s = lshr <8 x i64> %m, -+ %v2 = trunc <8 x i64> %s to <8 x i32> -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v0s = sext <4 x i64> %v0 to <4 x i128> -+ %v1s = sext <4 x i64> %v1 to <4 x i128> -+ %m = mul <4 x i128> %v0s, %v1s -+ %s = ashr <4 x i128> %m, -+ %v2 = trunc <4 x i128> %s to <4 x i64> -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v0z = zext <4 x i64> %v0 to <4 x i128> -+ %v1z = zext <4 x i64> %v1 to <4 x i128> -+ %m = mul <4 x i128> %v0z, %v1z -+ %s = lshr <4 x i128> %m, -+ %v2 = trunc <4 x i128> %s to <4 x i64> -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll -new file mode 100644 -index 000000000000..e1388f00e355 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll -@@ -0,0 +1,162 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v0s = sext <16 x i8> %v0 to <16 x i16> -+ %v1s = sext <16 x i8> %v1 to <16 x i16> -+ %m = mul <16 x i16> %v0s, %v1s -+ %s = ashr <16 x i16> %m, -+ %v2 = trunc <16 x i16> %s to <16 x i8> -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v0z = zext <16 x i8> %v0 to <16 x i16> -+ %v1z = zext <16 x i8> %v1 to <16 x i16> -+ %m = mul <16 x i16> %v0z, %v1z -+ %s = lshr <16 x i16> %m, -+ %v2 = trunc <16 x i16> %s to <16 x i8> -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v0s = sext <8 x i16> %v0 to <8 x i32> -+ %v1s = sext <8 x i16> %v1 to <8 x i32> -+ %m = mul <8 x i32> %v0s, %v1s -+ %s = ashr <8 x i32> %m, -+ %v2 = trunc <8 x i32> %s to <8 x i16> -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v0z = zext <8 x i16> %v0 to <8 x i32> -+ %v1z = zext <8 x i16> %v1 to <8 x i32> -+ %m = mul <8 x i32> %v0z, %v1z -+ %s = lshr <8 x i32> %m, -+ %v2 = trunc <8 x i32> %s to <8 x i16> -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v0s = sext <4 x i32> %v0 to <4 x i64> -+ %v1s = sext <4 x i32> %v1 to <4 x i64> -+ %m = mul <4 x i64> %v0s, %v1s -+ %s = ashr <4 x i64> %m, -+ %v2 = trunc <4 x i64> %s to <4 x i32> -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v0z = zext <4 x i32> %v0 to <4 x i64> -+ %v1z = zext <4 x i32> %v1 to <4 x i64> -+ %m = mul <4 x i64> %v0z, %v1z -+ %s = lshr <4 x i64> %m, -+ %v2 = trunc <4 x i64> %s to <4 x i32> -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v0s = sext <2 x i64> %v0 to <2 x i128> -+ %v1s = sext <2 x i64> %v1 to <2 x i128> -+ %m = mul <2 x i128> %v0s, %v1s -+ %s = ashr <2 x i128> %m, -+ %v2 = trunc <2 x i128> %s to <2 x i64> -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v0z = zext <2 x i64> %v0 to <2 x i128> -+ %v1z = zext <2 x i64> %v1 to <2 x i128> -+ %m = mul <2 x i128> %v0z, %v1z -+ %s = lshr <2 x i128> %m, -+ %v2 = trunc <2 x i128> %s to <2 x i64> -+ store <2 x i64> %v2, ptr %res -+ ret void -+} --- -2.20.1 - - -From 7d2d996fdab4fa9279318174f5b8042cc7ace0a6 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 6 Dec 2023 16:43:38 +0800 -Subject: [PATCH 22/35] [LoongArch] Make ISD::VSELECT a legal operation with - lsx/lasx - -(cherry picked from commit de21308f78f3b0f0910638dbdac90967150d19f0) ---- - .../LoongArch/LoongArchISelLowering.cpp | 5 ++ - .../LoongArch/LoongArchLASXInstrInfo.td | 8 ++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 ++ - llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 86 +++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/vselect.ll | 86 +++++++++++++++++++ - 5 files changed, 193 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/vselect.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/vselect.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 3d8d6898a4d5..229251987ae4 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -237,6 +237,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); -+ -+ setOperationAction(ISD::VSELECT, VT, Legal); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); -@@ -268,6 +270,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); -+ -+ setOperationAction(ISD::VSELECT, VT, Legal); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); -@@ -305,6 +309,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setStackPointerRegisterToSaveRestore(LoongArch::R3); - - setBooleanContents(ZeroOrOneBooleanContent); -+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - - setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 240f28b0dc5a..0bd8db1bfdf0 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1480,6 +1480,14 @@ def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), - def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), - (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; - -+// vselect -+def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, -+ (v32i8 (SplatPat_uimm8 uimm8:$imm)))), -+ (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; -+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in -+ def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), -+ (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index fb4726c530b5..5800ff6f6266 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1564,6 +1564,14 @@ def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), - def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), - (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; - -+// vselect -+def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, -+ (v16i8 (SplatPat_uimm8 uimm8:$imm)))), -+ (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; -+foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in -+ def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), -+ (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -new file mode 100644 -index 000000000000..24f4bcf752d3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -@@ -0,0 +1,86 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: select_v32i8_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, -256 -+; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> -+ store <32 x i8> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v32i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvrepli.h $xr2, -256 -+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> %v1 -+ store <32 x i8> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v16i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: lu12i.w $a1, -16 -+; CHECK-NEXT: lu32i.d $a1, 0 -+; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 -+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %sel = select <16 x i1> , <16 x i16> %v0, <16 x i16> %v1 -+ store <16 x i16> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: ori $a1, $zero, 0 -+; CHECK-NEXT: lu32i.d $a1, -1 -+; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1 -+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %sel = select <8 x i1> , <8 x i32> %v0, <8 x i32> %v1 -+ store <8 x i32> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) -+; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) -+; CHECK-NEXT: xvld $xr0, $a3, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvld $xr2, $a2, 0 -+; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %sel = select <4 x i1> , <4 x i64> %v0, <4 x i64> %v1 -+ store <4 x i64> %sel, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -new file mode 100644 -index 000000000000..00e3d9313f13 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -@@ -0,0 +1,86 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: select_v16i8_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, -256 -+; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> -+ store <16 x i8> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v16i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vrepli.h $vr2, -256 -+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> %v1 -+ store <16 x i8> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v8i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: lu12i.w $a1, -16 -+; CHECK-NEXT: lu32i.d $a1, 0 -+; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 -+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %sel = select <8 x i1> , <8 x i16> %v0, <8 x i16> %v1 -+ store <8 x i16> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: ori $a1, $zero, 0 -+; CHECK-NEXT: lu32i.d $a1, -1 -+; CHECK-NEXT: vreplgr2vr.d $vr2, $a1 -+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %sel = select <4 x i1> , <4 x i32> %v0, <4 x i32> %v1 -+ store <4 x i32> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) -+; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) -+; CHECK-NEXT: vld $vr0, $a3, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vld $vr2, $a2, 0 -+; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %sel = select <2 x i1> , <2 x i64> %v0, <2 x i64> %v1 -+ store <2 x i64> %sel, ptr %res -+ ret void -+} --- -2.20.1 - - -From 051e8cc8c17b13c4cb5ccd81038a305580fe3228 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Thu, 7 Dec 2023 20:11:43 +0800 -Subject: [PATCH 23/35] [LoongArch] Add codegen support for icmp/fcmp with - lsx/lasx fetaures (#74700) - -Mark ISD::SETCC node as legal, and add handling for the vector types -condition codes. - -(cherry picked from commit 9ff7d0ebeb54347f9006405a6d08ed2b713bc411) ---- - .../LoongArch/LoongArchISelLowering.cpp | 14 + - .../LoongArch/LoongArchLASXInstrInfo.td | 95 ++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 95 ++ - .../LoongArch/lasx/ir-instruction/fcmp.ll | 692 +++++++++++++ - .../LoongArch/lasx/ir-instruction/icmp.ll | 939 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/fcmp.ll | 692 +++++++++++++ - .../LoongArch/lsx/ir-instruction/icmp.ll | 939 ++++++++++++++++++ - 7 files changed, 3466 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 229251987ae4..3d5ae6d3deda 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -238,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - -+ setOperationAction(ISD::SETCC, VT, Legal); - setOperationAction(ISD::VSELECT, VT, Legal); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { -@@ -251,11 +252,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); - setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); - setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); -+ setCondCodeAction( -+ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, -+ Expand); - } - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); -+ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, -+ ISD::SETUGE, ISD::SETUGT}, -+ VT, Expand); - } - } - -@@ -271,6 +278,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - -+ setOperationAction(ISD::SETCC, VT, Legal); - setOperationAction(ISD::VSELECT, VT, Legal); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { -@@ -284,11 +292,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); - setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); - setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); -+ setCondCodeAction( -+ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, -+ Expand); - } - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); -+ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, -+ ISD::SETUGE, ISD::SETUGT}, -+ VT, Expand); - } - } - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 0bd8db1bfdf0..a9bf65c6840d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1184,6 +1184,65 @@ multiclass PatShiftXrUimm { - (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; - } - -+multiclass PatCCXrSimm5 { -+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), -+ (v32i8 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), -+ (v16i16 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), -+ (v8i32 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), -+ (v4i64 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; -+} -+ -+multiclass PatCCXrUimm5 { -+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), -+ (v32i8 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), -+ (v16i16 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), -+ (v8i32 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), -+ (v4i64 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; -+} -+ -+multiclass PatCCXrXr { -+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), -+ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), -+ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), -+ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatCCXrXrU { -+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), -+ (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), -+ (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), -+ (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), -+ (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatCCXrXrF { -+ def : Pat<(v8i32 (setcc (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), CC)), -+ (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v4i64 (setcc (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), CC)), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ - let Predicates = [HasExtLASX] in { - - // XVADD_{B/H/W/D} -@@ -1389,6 +1448,42 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), - def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), - (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; - -+// XVSEQ[I]_{B/H/W/D} -+defm : PatCCXrSimm5; -+defm : PatCCXrXr; -+ -+// XVSLE[I]_{B/H/W/D}[U] -+defm : PatCCXrSimm5; -+defm : PatCCXrUimm5; -+defm : PatCCXrXr; -+defm : PatCCXrXrU; -+ -+// XVSLT[I]_{B/H/W/D}[U] -+defm : PatCCXrSimm5; -+defm : PatCCXrUimm5; -+defm : PatCCXrXr; -+defm : PatCCXrXrU; -+ -+// XVFCMP.cond.{S/D} -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ - // PseudoXVINSGR2VR_{B/H} - def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), - (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 5800ff6f6266..ff21c6681271 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1261,6 +1261,65 @@ multiclass PatShiftVrUimm { - (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; - } - -+multiclass PatCCVrSimm5 { -+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), -+ (v16i8 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), -+ (v8i16 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), -+ (v4i32 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), -+ (v2i64 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; -+} -+ -+multiclass PatCCVrUimm5 { -+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), -+ (v16i8 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), -+ (v8i16 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), -+ (v4i32 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), -+ (v2i64 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; -+} -+ -+multiclass PatCCVrVr { -+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), -+ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), -+ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), -+ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatCCVrVrU { -+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), -+ (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), -+ (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), -+ (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), -+ (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatCCVrVrF { -+ def : Pat<(v4i32 (setcc (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), CC)), -+ (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v2i64 (setcc (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), CC)), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ - let Predicates = [HasExtLSX] in { - - // VADD_{B/H/W/D} -@@ -1466,6 +1525,42 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), - def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), - (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; - -+// VSEQ[I]_{B/H/W/D} -+defm : PatCCVrSimm5; -+defm : PatCCVrVr; -+ -+// VSLE[I]_{B/H/W/D}[U] -+defm : PatCCVrSimm5; -+defm : PatCCVrUimm5; -+defm : PatCCVrVr; -+defm : PatCCVrVrU; -+ -+// VSLT[I]_{B/H/W/D}[U] -+defm : PatCCVrSimm5; -+defm : PatCCVrUimm5; -+defm : PatCCVrVr; -+defm : PatCCVrVrU; -+ -+// VFCMP.cond.{S/D} -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ - // VINSGR2VR_{B/H/W/D} - def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), - (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll -new file mode 100644 -index 000000000000..ef67dbc100c0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll -@@ -0,0 +1,692 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+;; TREU -+define void @v8f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_true: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvrepli.b $xr0, -1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp true <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+;; FALSE -+define void @v4f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_false: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvrepli.b $xr0, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp false <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOEQ -+define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_oeq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp oeq <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_oeq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp oeq <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUEQ -+define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ueq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ueq <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ueq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ueq <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETEQ -+define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast oeq <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast ueq <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOLE -+define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ole: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ole <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ole: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ole <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULE -+define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ule <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ule <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLE -+define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_le: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast ole <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_le: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast ule <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOLT -+define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_olt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp olt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_olt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp olt <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULT -+define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ult <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ult <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLT -+define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_lt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast olt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_lt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast ult <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETONE -+define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_one: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp one <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_one: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp one <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUNE -+define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_une: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp une <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_une: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp une <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETNE -+define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast one <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast une <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETO -+define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ord: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ord <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ord: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ord <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUO -+define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_uno: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp uno <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_uno: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp uno <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETOGT -+define void @v8f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ogt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ogt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ogt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ogt <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGT -+define void @v8f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ugt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ugt <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGT -+define void @v8f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_gt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast ogt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_gt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast ugt <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETOGE -+define void @v8f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_oge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp oge <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_oge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp oge <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGE -+define void @v8f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp uge <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp uge <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGE -+define void @v8f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast oge <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast uge <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll -new file mode 100644 -index 000000000000..6693fe0f6ec7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll -@@ -0,0 +1,939 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+;; SETEQ -+define void @v32i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvseqi.b $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp eq <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp eq <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvseqi.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp eq <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp eq <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvseqi.w $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp eq <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp eq <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvseqi.d $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp eq <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp eq <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLE -+define void @v32i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.b $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp sle <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp sle <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp sle <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp sle <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.w $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp sle <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp sle <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.d $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp sle <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp sle <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULE -+define void @v32i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.bu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp ule <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp ule <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.hu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp ule <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp ule <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.wu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp ule <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp ule <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.du $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp ule <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp ule <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLT -+define void @v32i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.b $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp slt <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp slt <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp slt <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp slt <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.w $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp slt <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp slt <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.d $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp slt <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp slt <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULT -+define void @v32i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.bu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp ult <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp ult <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.hu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp ult <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp ult <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.wu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp ult <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp ult <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.du $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp ult <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp ult <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETNE -+define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp ne <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvrepli.b $xr1, -1 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp ne <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvrepli.b $xr1, -1 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp ne <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvrepli.b $xr1, -1 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp ne <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGE -+define void @v32i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp sge <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp sge <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp sge <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp sge <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGE -+define void @v32i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp uge <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp uge <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp uge <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp uge <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGT -+define void @v32i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp sgt <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp sgt <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp sgt <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp sgt <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGT -+define void @v32i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp ugt <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp ugt <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp ugt <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp ugt <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll -new file mode 100644 -index 000000000000..53fbf0b2f86f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll -@@ -0,0 +1,692 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+;; TREU -+define void @v4f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_true: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vrepli.b $vr0, -1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp true <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+;; FALSE -+define void @v2f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_false: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vrepli.b $vr0, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp false <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOEQ -+define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_oeq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp oeq <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_oeq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp oeq <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUEQ -+define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ueq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ueq <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ueq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ueq <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETEQ -+define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast oeq <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast ueq <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOLE -+define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ole: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ole <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ole: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ole <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULE -+define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ule <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ule <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLE -+define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_le: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast ole <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_le: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast ule <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOLT -+define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_olt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp olt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_olt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp olt <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULT -+define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ult <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ult <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLT -+define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_lt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast olt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_lt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast ult <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETONE -+define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_one: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp one <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_one: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp one <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUNE -+define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_une: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cune.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp une <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_une: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cune.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp une <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETNE -+define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast one <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast une <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETO -+define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ord: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cor.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ord <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ord: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cor.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ord <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUO -+define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_uno: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cun.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp uno <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_uno: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cun.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp uno <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETOGT -+define void @v4f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ogt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ogt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ogt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ogt <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGT -+define void @v4f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ugt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ugt <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGT -+define void @v4f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_gt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast ogt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_gt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast ugt <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETOGE -+define void @v4f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_oge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp oge <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_oge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp oge <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGE -+define void @v4f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp uge <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp uge <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGE -+define void @v4f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast oge <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast uge <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll -new file mode 100644 -index 000000000000..448f3fa6c6e0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll -@@ -0,0 +1,939 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+;; SETEQ -+define void @v16i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp eq <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp eq <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp eq <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp eq <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vseqi.w $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp eq <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp eq <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vseqi.d $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp eq <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp eq <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLE -+define void @v16i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.b $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp sle <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp sle <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp sle <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp sle <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.w $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp sle <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp sle <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.d $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp sle <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp sle <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULE -+define void @v16i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.bu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp ule <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp ule <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.hu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp ule <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp ule <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp ule <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp ule <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.du $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp ule <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp ule <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLT -+define void @v16i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.b $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp slt <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp slt <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp slt <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp slt <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.w $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp slt <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp slt <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.d $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp slt <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp slt <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULT -+define void @v16i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.bu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp ult <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp ult <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.hu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp ult <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp ult <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp ult <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp ult <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.du $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp ult <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp ult <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETNE -+define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vxori.b $vr0, $vr0, 255 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp ne <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vrepli.b $vr1, -1 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp ne <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vrepli.b $vr1, -1 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp ne <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vrepli.b $vr1, -1 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp ne <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGE -+define void @v16i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp sge <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp sge <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp sge <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp sge <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGE -+define void @v16i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp uge <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp uge <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp uge <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp uge <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGT -+define void @v16i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp sgt <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp sgt <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp sgt <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp sgt <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGT -+define void @v16i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp ugt <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp ugt <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp ugt <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp ugt <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} --- -2.20.1 - - -From 49444f4fbca6681e0fd404a19b562ccfcc140879 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Fri, 8 Dec 2023 14:16:26 +0800 -Subject: [PATCH 24/35] [LoongArch] Make ISD::FSQRT a legal operation with - lsx/lasx feature (#74795) - -And add some patterns: -1. (fdiv 1.0, vector) -2. (fdiv 1.0, (fsqrt vector)) - -(cherry picked from commit 9f70e708a7d3fce97d63b626520351501455fca0) ---- - .../LoongArch/LoongArchISelLowering.cpp | 2 + - .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 45 +++++++++++++ - llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll | 65 +++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/fdiv.ll | 29 +++++++++ - llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll | 65 +++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/fdiv.ll | 29 +++++++++ - 7 files changed, 257 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 3d5ae6d3deda..8c54c7cf2cab 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -260,6 +260,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); -+ setOperationAction(ISD::FSQRT, VT, Legal); - setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, - ISD::SETUGE, ISD::SETUGT}, - VT, Expand); -@@ -300,6 +301,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); -+ setOperationAction(ISD::FSQRT, VT, Legal); - setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, - ISD::SETUGE, ISD::SETUGT}, - VT, Expand); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index a9bf65c6840d..55b90f4450c0 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1092,6 +1092,13 @@ multiclass PatXr { - (!cast(Inst#"_D") LASX256:$xj)>; - } - -+multiclass PatXrF { -+ def : Pat<(v8f32 (OpNode (v8f32 LASX256:$xj))), -+ (!cast(Inst#"_S") LASX256:$xj)>; -+ def : Pat<(v4f64 (OpNode (v4f64 LASX256:$xj))), -+ (!cast(Inst#"_D") LASX256:$xj)>; -+} -+ - multiclass PatXrXr { - def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), - (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; -@@ -1448,6 +1455,21 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), - def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), - (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; - -+// XVFSQRT_{S/D} -+defm : PatXrF; -+ -+// XVRECIP_{S/D} -+def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj), -+ (XVFRECIP_S v8f32:$xj)>; -+def : Pat<(fdiv vsplatf64_fpimm_eq_1, v4f64:$xj), -+ (XVFRECIP_D v4f64:$xj)>; -+ -+// XVFRSQRT_{S/D} -+def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v8f32:$xj)), -+ (XVFRSQRT_S v8f32:$xj)>; -+def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v4f64:$xj)), -+ (XVFRSQRT_D v4f64:$xj)>; -+ - // XVSEQ[I]_{B/H/W/D} - defm : PatCCXrSimm5; - defm : PatCCXrXr; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index ff21c6681271..8ad0c5904f25 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -95,6 +95,29 @@ def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), - Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; - }]>; - -+def vsplatf32_fpimm_eq_1 -+ : PatFrags<(ops), [(bitconvert (v4i32 (build_vector))), -+ (bitconvert (v8i32 (build_vector)))], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && -+ Imm == APFloat(+1.0f).bitcastToAPInt(); -+}]>; -+def vsplatf64_fpimm_eq_1 -+ : PatFrags<(ops), [(bitconvert (v2i64 (build_vector))), -+ (bitconvert (v4i64 (build_vector)))], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && -+ Imm == APFloat(+1.0).bitcastToAPInt(); -+}]>; -+ - def vsplati8imm7 : PatFrag<(ops node:$reg), - (and node:$reg, vsplati8_imm_eq_7)>; - def vsplati16imm15 : PatFrag<(ops node:$reg), -@@ -1173,6 +1196,13 @@ multiclass PatVr { - (!cast(Inst#"_D") LSX128:$vj)>; - } - -+multiclass PatVrF { -+ def : Pat<(v4f32 (OpNode (v4f32 LSX128:$vj))), -+ (!cast(Inst#"_S") LSX128:$vj)>; -+ def : Pat<(v2f64 (OpNode (v2f64 LSX128:$vj))), -+ (!cast(Inst#"_D") LSX128:$vj)>; -+} -+ - multiclass PatVrVr { - def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), - (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; -@@ -1525,6 +1555,21 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), - def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), - (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; - -+// VFSQRT_{S/D} -+defm : PatVrF; -+ -+// VFRECIP_{S/D} -+def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj), -+ (VFRECIP_S v4f32:$vj)>; -+def : Pat<(fdiv vsplatf64_fpimm_eq_1, v2f64:$vj), -+ (VFRECIP_D v2f64:$vj)>; -+ -+// VFRSQRT_{S/D} -+def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v4f32:$vj)), -+ (VFRSQRT_S v4f32:$vj)>; -+def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v2f64:$vj)), -+ (VFRSQRT_D v2f64:$vj)>; -+ - // VSEQ[I]_{B/H/W/D} - defm : PatCCVrSimm5; - defm : PatCCVrVr; -diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll -new file mode 100644 -index 000000000000..c4a881bdeae9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll -@@ -0,0 +1,65 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+;; fsqrt -+define void @sqrt_v8f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sqrt_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0, align 16 -+ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) -+ store <8 x float> %sqrt, ptr %res, align 16 -+ ret void -+} -+ -+define void @sqrt_v4f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sqrt_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0, align 16 -+ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) -+ store <4 x double> %sqrt, ptr %res, align 16 -+ ret void -+} -+ -+;; 1.0 / (fsqrt vec) -+define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_div_sqrt_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0, align 16 -+ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) -+ %div = fdiv <8 x float> , %sqrt -+ store <8 x float> %div, ptr %res, align 16 -+ ret void -+} -+ -+define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_div_sqrt_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0, align 16 -+ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) -+ %div = fdiv <4 x double> , %sqrt -+ store <4 x double> %div, ptr %res, align 16 -+ ret void -+} -+ -+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) -+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -index 284121a79a49..6004565b0b78 100644 ---- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -@@ -32,3 +32,32 @@ entry: - store <4 x double> %v2, ptr %res - ret void - } -+ -+;; 1.0 / vec -+define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_fdiv_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfrecip.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %div = fdiv <8 x float> , %v0 -+ store <8 x float> %div, ptr %res -+ ret void -+} -+ -+define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_fdiv_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfrecip.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %div = fdiv <4 x double> , %v0 -+ store <4 x double> %div, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll -new file mode 100644 -index 000000000000..a57bc1ca0e94 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll -@@ -0,0 +1,65 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+;; fsqrt -+define void @sqrt_v4f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sqrt_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfsqrt.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0, align 16 -+ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) -+ store <4 x float> %sqrt, ptr %res, align 16 -+ ret void -+} -+ -+define void @sqrt_v2f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sqrt_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfsqrt.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0, align 16 -+ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) -+ store <2 x double> %sqrt, ptr %res, align 16 -+ ret void -+} -+ -+;; 1.0 / (fsqrt vec) -+define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_div_sqrt_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0, align 16 -+ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) -+ %div = fdiv <4 x float> , %sqrt -+ store <4 x float> %div, ptr %res, align 16 -+ ret void -+} -+ -+define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_div_sqrt_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0, align 16 -+ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) -+ %div = fdiv <2 x double> , %sqrt -+ store <2 x double> %div, ptr %res, align 16 -+ ret void -+} -+ -+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) -+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -index eb7c8bd9616e..5f1ee9e4d212 100644 ---- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -@@ -32,3 +32,32 @@ entry: - store <2 x double> %v2, ptr %res - ret void - } -+ -+;; 1.0 / vec -+define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_fdiv_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfrecip.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %div = fdiv <4 x float> , %v0 -+ store <4 x float> %div, ptr %res -+ ret void -+} -+ -+define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_fdiv_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfrecip.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %div = fdiv <2 x double> , %v0 -+ store <2 x double> %div, ptr %res -+ ret void -+} --- -2.20.1 - - -From 5942b745b9680284decadd33d2242ffd3d2d61c0 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Fri, 8 Dec 2023 14:21:10 +0800 -Subject: [PATCH 25/35] [LoongArch] Mark ISD::FNEG as legal - -(cherry picked from commit cdc37325669c0321328a7245083c427b229e79e9) ---- - .../LoongArch/LoongArchISelLowering.cpp | 2 ++ - .../LoongArch/LoongArchLASXInstrInfo.td | 4 +++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 +++ - .../LoongArch/lasx/ir-instruction/fneg.ll | 29 +++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/fneg.ll | 29 +++++++++++++++++++ - 5 files changed, 68 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 8c54c7cf2cab..c7f4b1d24f07 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -261,6 +261,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); - setOperationAction(ISD::FSQRT, VT, Legal); -+ setOperationAction(ISD::FNEG, VT, Legal); - setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, - ISD::SETUGE, ISD::SETUGT}, - VT, Expand); -@@ -302,6 +303,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); - setOperationAction(ISD::FSQRT, VT, Legal); -+ setOperationAction(ISD::FNEG, VT, Legal); - setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, - ISD::SETUGE, ISD::SETUGT}, - VT, Expand); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 55b90f4450c0..8559baa0e525 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1605,6 +1605,10 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in - def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), - (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; - -+// fneg -+def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; -+def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 8ad0c5904f25..5947f241bb59 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1712,6 +1712,10 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in - def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), - (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; - -+// fneg -+def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; -+def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll -new file mode 100644 -index 000000000000..5eb468fc55a0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll -@@ -0,0 +1,29 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fneg_v8f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: fneg_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = fneg <8 x float> %v0 -+ store <8 x float> %v1, ptr %res -+ ret void -+} -+define void @fneg_v4f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: fneg_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = fneg <4 x double> %v0 -+ store <4 x double> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll -new file mode 100644 -index 000000000000..795c1ac8b368 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll -@@ -0,0 +1,29 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fneg_v4f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: fneg_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = fneg <4 x float> %v0 -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+define void @fneg_v2f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: fneg_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = fneg <2 x double> %v0 -+ store <2 x double> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From b8eb506d34e303ddc42bc4e8f304a81ba320dff2 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Mon, 11 Dec 2023 10:37:22 +0800 -Subject: [PATCH 26/35] [LoongArch] Add codegen support for - [X]VF{MSUB/NMADD/NMSUB}.{S/D} instructions (#74819) - -This is similar to single and double-precision floating-point -instructions. - -(cherry picked from commit af999c4be9f5643724c6f379690ecee4346b2b48) ---- - .../LoongArch/LoongArchLASXInstrInfo.td | 26 + - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 26 + - llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll | 804 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll | 804 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll | 804 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll | 804 ++++++++++++++++++ - 6 files changed, 3268 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 8559baa0e525..ec6983d0f487 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1455,6 +1455,32 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), - def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), - (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; - -+// XVFMSUB_{S/D} -+def : Pat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), -+ (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), -+ (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+ -+// XVFNMADD_{S/D} -+def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, v8f32:$xa)), -+ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, v4f64:$xa)), -+ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), -+ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), -+ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+ -+// XVFNMSUB_{S/D} -+def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa))), -+ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa))), -+ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), -+ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), -+ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+ - // XVFSQRT_{S/D} - defm : PatXrF; - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 5947f241bb59..e468176885d7 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1555,6 +1555,32 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), - def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), - (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; - -+// VFMSUB_{S/D} -+def : Pat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), -+ (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), -+ (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+ -+// VFNMADD_{S/D} -+def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, v4f32:$va)), -+ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, v2f64:$va)), -+ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), -+ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), -+ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+ -+// VFNMSUB_{S/D} -+def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va))), -+ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va))), -+ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), -+ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), -+ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+ - // VFSQRT_{S/D} - defm : PatVrF; - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll -new file mode 100644 -index 000000000000..af18c52b096c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll -@@ -0,0 +1,804 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-ON -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF -+ -+define void @xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul<4 x double> %v0, %v1 -+ %add = fadd<4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul<4 x double> %v0, %v1 -+ %sub = fsub<4 x double> %mul, %v2 -+ store <4 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul<4 x double> %v0, %v1 -+ %add = fadd<4 x double> %mul, %v2 -+ %negadd = fneg<4 x double> %add -+ store <4 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg nsz<4 x double> %v0 -+ %negv2 = fneg nsz<4 x double> %v2 -+ %mul = fmul nsz<4 x double> %negv0, %v1 -+ %add = fadd nsz<4 x double> %mul, %negv2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that xvfnmadd.d is not emitted. -+define void @not_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_xvfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_xvfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_xvfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg<4 x double> %v0 -+ %negv2 = fneg<4 x double> %v2 -+ %mul = fmul<4 x double> %negv0, %v1 -+ %add = fadd<4 x double> %mul, %negv2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv2 = fneg<4 x double> %v2 -+ %mul = fmul<4 x double> %v0, %v1 -+ %add = fadd<4 x double> %mul, %negv2 -+ %neg = fneg<4 x double> %add -+ store <4 x double> %neg, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg nsz<4 x double> %v0 -+ %mul = fmul nsz<4 x double> %negv0, %v1 -+ %add = fadd nsz<4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that xvfnmsub.d is not emitted. -+define void @not_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_xvfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_xvfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_xvfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg<4 x double> %v0 -+ %mul = fmul<4 x double> %negv0, %v1 -+ %add = fadd<4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %sub = fsub contract <4 x double> %mul, %v2 -+ store <4 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ %negadd = fneg contract <4 x double> %add -+ store <4 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg contract nsz<4 x double> %v0 -+ %negv2 = fneg contract nsz<4 x double> %v2 -+ %mul = fmul contract nsz<4 x double> %negv0, %v1 -+ %add = fadd contract nsz<4 x double> %mul, %negv2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that xvfnmadd.d is not emitted. -+define void @not_contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_xvfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg contract <4 x double> %v0 -+ %negv2 = fneg contract <4 x double> %v2 -+ %mul = fmul contract <4 x double> %negv0, %v1 -+ %add = fadd contract <4 x double> %mul, %negv2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv2 = fneg contract <4 x double> %v2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %negv2 -+ %neg = fneg contract <4 x double> %add -+ store <4 x double> %neg, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg contract nsz<4 x double> %v0 -+ %mul = fmul contract nsz<4 x double> %negv0, %v1 -+ %add = fadd contract nsz<4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that xvfnmsub.d is not emitted. -+define void @not_contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_xvfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg contract <4 x double> %v0 -+ %mul = fmul contract <4 x double> %negv0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmadd_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmadd_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmadd_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmsub_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmsub_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmsub_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %sub = fsub contract <4 x double> %mul, %v2 -+ store <4 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ %negadd = fneg contract <4 x double> %add -+ store <4 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %negv2 = fneg contract <4 x double> %v2 -+ %add = fadd contract <4 x double> %negv2, %mul -+ %negadd = fneg contract <4 x double> %add -+ store <4 x double> %negadd, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll -new file mode 100644 -index 000000000000..b7b3cb3a2e66 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll -@@ -0,0 +1,804 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-ON -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF -+ -+define void @xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul<8 x float> %v0, %v1 -+ %add = fadd<8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul<8 x float> %v0, %v1 -+ %sub = fsub<8 x float> %mul, %v2 -+ store <8 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul<8 x float> %v0, %v1 -+ %add = fadd<8 x float> %mul, %v2 -+ %negadd = fneg<8 x float> %add -+ store <8 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg nsz<8 x float> %v0 -+ %negv2 = fneg nsz<8 x float> %v2 -+ %mul = fmul nsz<8 x float> %negv0, %v1 -+ %add = fadd nsz<8 x float> %mul, %negv2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that fnmadd.s is not emitted. -+define void @not_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_xvfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_xvfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_xvfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg<8 x float> %v0 -+ %negv2 = fneg<8 x float> %v2 -+ %mul = fmul<8 x float> %negv0, %v1 -+ %add = fadd<8 x float> %mul, %negv2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv2 = fneg<8 x float> %v2 -+ %mul = fmul<8 x float> %v0, %v1 -+ %add = fadd<8 x float> %mul, %negv2 -+ %neg = fneg<8 x float> %add -+ store <8 x float> %neg, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg nsz<8 x float> %v0 -+ %mul = fmul nsz<8 x float> %negv0, %v1 -+ %add = fadd nsz<8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that fnmsub.s is not emitted. -+define void @not_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_xvfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_xvfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_xvfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg<8 x float> %v0 -+ %mul = fmul<8 x float> %negv0, %v1 -+ %add = fadd<8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %sub = fsub contract <8 x float> %mul, %v2 -+ store <8 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ %negadd = fneg contract <8 x float> %add -+ store <8 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg contract nsz<8 x float> %v0 -+ %negv2 = fneg contract nsz<8 x float> %v2 -+ %mul = fmul contract nsz<8 x float> %negv0, %v1 -+ %add = fadd contract nsz<8 x float> %mul, %negv2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that fnmadd.s is not emitted. -+define void @not_contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_xvfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg contract <8 x float> %v0 -+ %negv2 = fneg contract <8 x float> %v2 -+ %mul = fmul contract <8 x float> %negv0, %v1 -+ %add = fadd contract <8 x float> %mul, %negv2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv2 = fneg contract <8 x float> %v2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %negv2 -+ %neg = fneg contract <8 x float> %add -+ store <8 x float> %neg, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg contract nsz<8 x float> %v0 -+ %mul = fmul contract nsz<8 x float> %negv0, %v1 -+ %add = fadd contract nsz<8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that fnmsub.s is not emitted. -+define void @not_contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_xvfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg contract <8 x float> %v0 -+ %mul = fmul contract <8 x float> %negv0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmadd_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmadd_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmadd_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmsub_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmsub_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmsub_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %sub = fsub contract <8 x float> %mul, %v2 -+ store <8 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ %negadd = fneg contract <8 x float> %add -+ store <8 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %negv2 = fneg contract <8 x float> %v2 -+ %add = fadd contract <8 x float> %negv2, %mul -+ %negadd = fneg contract <8 x float> %add -+ store <8 x float> %negadd, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll -new file mode 100644 -index 000000000000..8e0459b4afab ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll -@@ -0,0 +1,804 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-ON -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF -+ -+define void @vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul<2 x double> %v0, %v1 -+ %add = fadd<2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul<2 x double> %v0, %v1 -+ %sub = fsub<2 x double> %mul, %v2 -+ store <2 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul<2 x double> %v0, %v1 -+ %add = fadd<2 x double> %mul, %v2 -+ %negadd = fneg<2 x double> %add -+ store <2 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg nsz<2 x double> %v0 -+ %negv2 = fneg nsz<2 x double> %v2 -+ %mul = fmul nsz<2 x double> %negv0, %v1 -+ %add = fadd nsz<2 x double> %mul, %negv2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmadd.d is not emitted. -+define void @not_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_vfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_vfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_vfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg<2 x double> %v0 -+ %negv2 = fneg<2 x double> %v2 -+ %mul = fmul<2 x double> %negv0, %v1 -+ %add = fadd<2 x double> %mul, %negv2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv2 = fneg<2 x double> %v2 -+ %mul = fmul<2 x double> %v0, %v1 -+ %add = fadd<2 x double> %mul, %negv2 -+ %neg = fneg<2 x double> %add -+ store <2 x double> %neg, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg nsz<2 x double> %v0 -+ %mul = fmul nsz<2 x double> %negv0, %v1 -+ %add = fadd nsz<2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmsub.d is not emitted. -+define void @not_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_vfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_vfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_vfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg<2 x double> %v0 -+ %mul = fmul<2 x double> %negv0, %v1 -+ %add = fadd<2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %sub = fsub contract <2 x double> %mul, %v2 -+ store <2 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ %negadd = fneg contract <2 x double> %add -+ store <2 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmadd_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmadd_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmadd_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg contract nsz<2 x double> %v0 -+ %negv2 = fneg contract nsz<2 x double> %v2 -+ %mul = fmul contract nsz<2 x double> %negv0, %v1 -+ %add = fadd contract nsz<2 x double> %mul, %negv2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmadd.d is not emitted. -+define void @not_contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_vfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_vfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_vfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg contract <2 x double> %v0 -+ %negv2 = fneg contract <2 x double> %v2 -+ %mul = fmul contract <2 x double> %negv0, %v1 -+ %add = fadd contract <2 x double> %mul, %negv2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv2 = fneg contract <2 x double> %v2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %negv2 -+ %neg = fneg contract <2 x double> %add -+ store <2 x double> %neg, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmsub_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmsub_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmsub_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg contract nsz<2 x double> %v0 -+ %mul = fmul contract nsz<2 x double> %negv0, %v1 -+ %add = fadd contract nsz<2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmsub.d is not emitted. -+define void @not_contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_vfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_vfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_vfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg contract <2 x double> %v0 -+ %mul = fmul contract <2 x double> %negv0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @vfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmadd_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmadd_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmadd_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @vfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmsub_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmsub_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmsub_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %sub = fsub contract <2 x double> %mul, %v2 -+ store <2 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ %negadd = fneg contract <2 x double> %add -+ store <2 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %negv2 = fneg contract <2 x double> %v2 -+ %add = fadd contract <2 x double> %negv2, %mul -+ %negadd = fneg contract <2 x double> %add -+ store <2 x double> %negadd, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll -new file mode 100644 -index 000000000000..7efbd61c0c4f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll -@@ -0,0 +1,804 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-ON -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF -+ -+define void @vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul<4 x float> %v0, %v1 -+ %add = fadd<4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul<4 x float> %v0, %v1 -+ %sub = fsub<4 x float> %mul, %v2 -+ store <4 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul<4 x float> %v0, %v1 -+ %add = fadd<4 x float> %mul, %v2 -+ %negadd = fneg<4 x float> %add -+ store <4 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg nsz<4 x float> %v0 -+ %negv2 = fneg nsz<4 x float> %v2 -+ %mul = fmul nsz<4 x float> %negv0, %v1 -+ %add = fadd nsz<4 x float> %mul, %negv2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmadd.s is not emitted. -+define void @not_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_vfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_vfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_vfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg<4 x float> %v0 -+ %negv2 = fneg<4 x float> %v2 -+ %mul = fmul<4 x float> %negv0, %v1 -+ %add = fadd<4 x float> %mul, %negv2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv2 = fneg<4 x float> %v2 -+ %mul = fmul<4 x float> %v0, %v1 -+ %add = fadd<4 x float> %mul, %negv2 -+ %neg = fneg<4 x float> %add -+ store <4 x float> %neg, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg nsz<4 x float> %v0 -+ %mul = fmul nsz<4 x float> %negv0, %v1 -+ %add = fadd nsz<4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmsub.s is not emitted. -+define void @not_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_vfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_vfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_vfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg<4 x float> %v0 -+ %mul = fmul<4 x float> %negv0, %v1 -+ %add = fadd<4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %sub = fsub contract <4 x float> %mul, %v2 -+ store <4 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ %negadd = fneg contract <4 x float> %add -+ store <4 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmadd_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmadd_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmadd_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg contract nsz<4 x float> %v0 -+ %negv2 = fneg contract nsz<4 x float> %v2 -+ %mul = fmul contract nsz<4 x float> %negv0, %v1 -+ %add = fadd contract nsz<4 x float> %mul, %negv2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmadd.s is not emitted. -+define void @not_contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_vfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_vfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_vfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg contract <4 x float> %v0 -+ %negv2 = fneg contract <4 x float> %v2 -+ %mul = fmul contract <4 x float> %negv0, %v1 -+ %add = fadd contract <4 x float> %mul, %negv2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv2 = fneg contract <4 x float> %v2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %negv2 -+ %neg = fneg contract <4 x float> %add -+ store <4 x float> %neg, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmsub_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmsub_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmsub_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg contract nsz<4 x float> %v0 -+ %mul = fmul contract nsz<4 x float> %negv0, %v1 -+ %add = fadd contract nsz<4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmsub.s is not emitted. -+define void @not_contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_vfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_vfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_vfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg contract <4 x float> %v0 -+ %mul = fmul contract <4 x float> %negv0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @vfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmadd_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmadd_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmadd_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @vfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmsub_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmsub_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmsub_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %sub = fsub contract <4 x float> %mul, %v2 -+ store <4 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ %negadd = fneg contract <4 x float> %add -+ store <4 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %negv2 = fneg contract <4 x float> %v2 -+ %add = fadd contract <4 x float> %negv2, %mul -+ %negadd = fneg contract <4 x float> %add -+ store <4 x float> %negadd, ptr %res -+ ret void -+} --- -2.20.1 - - -From 8aa8ce5abc7bf58ef9ae0460d1e9ed705895a887 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Mon, 25 Dec 2023 10:09:20 +0800 -Subject: [PATCH 27/35] [LoongArch] Fix LASX vector_extract codegen - -Custom lowering `ISD::EXTRACT_VECTOR_ELT` with lasx. - -(cherry picked from commit 47c88bcd5de91522241cca1aaa1b7762ceb01394) ---- - .../LoongArch/LoongArchISelLowering.cpp | 21 +++- - .../Target/LoongArch/LoongArchISelLowering.h | 1 + - .../LoongArch/LoongArchLASXInstrInfo.td | 40 ++---- - .../lasx/ir-instruction/extractelement.ll | 114 ++++++++++++++---- - 4 files changed, 119 insertions(+), 57 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index c7f4b1d24f07..cf881ce720a6 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -277,7 +277,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::UNDEF, VT, Legal); - - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); -- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - - setOperationAction(ISD::SETCC, VT, Legal); -@@ -395,6 +395,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - return lowerWRITE_REGISTER(Op, DAG); - case ISD::INSERT_VECTOR_ELT: - return lowerINSERT_VECTOR_ELT(Op, DAG); -+ case ISD::EXTRACT_VECTOR_ELT: -+ return lowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::BUILD_VECTOR: - return lowerBUILD_VECTOR(Op, DAG); - case ISD::VECTOR_SHUFFLE: -@@ -502,6 +504,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, - return SDValue(); - } - -+SDValue -+LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, -+ SelectionDAG &DAG) const { -+ EVT VecTy = Op->getOperand(0)->getValueType(0); -+ SDValue Idx = Op->getOperand(1); -+ EVT EltTy = VecTy.getVectorElementType(); -+ unsigned NumElts = VecTy.getVectorNumElements(); -+ -+ if (isa(Idx) && -+ (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || -+ EltTy == MVT::f64 || -+ cast(Idx)->getZExtValue() < NumElts / 2)) -+ return Op; -+ -+ return SDValue(); -+} -+ - SDValue - LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 2c35f9e5d378..6b5a851ec55d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -279,6 +279,7 @@ private: - SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index ec6983d0f487..9b7a34688811 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1590,38 +1590,14 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), - (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; - def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), - (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; --def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), -- (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; --def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), -- (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; --def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), -- (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; --def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), -- (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; -- --// Vector extraction with variable index. --def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), -- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, -- i64:$rk), -- sub_32)), -- GPR), (i64 24))>; --def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), -- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, -- i64:$rk), -- sub_32)), -- GPR), (i64 16))>; --def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), -- (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), -- sub_32)), -- GPR)>; --def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), -- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), -- sub_64)), -- GPR)>; --def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), -- (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; --def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), -- (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; -+def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), -+ (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; -+def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), -+ (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; -+def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)), -+ (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>; -+def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), -+ (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; - - // vselect - def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -index 78f584cd09a8..02b76bf75b75 100644 ---- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -@@ -31,7 +31,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind { - ; CHECK-LABEL: extract_8xi32: - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 - ; CHECK-NEXT: st.w $a0, $a1, 0 - ; CHECK-NEXT: ret - %v = load volatile <8 x i32>, ptr %src -@@ -44,7 +44,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind { - ; CHECK-LABEL: extract_4xi64: - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 -+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 - ; CHECK-NEXT: st.d $a0, $a1, 0 - ; CHECK-NEXT: ret - %v = load volatile <4 x i64>, ptr %src -@@ -57,8 +57,8 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { - ; CHECK-LABEL: extract_8xfloat: - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: ori $a0, $zero, 7 --; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 -+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 -+; CHECK-NEXT: movgr2fr.w $fa0, $a0 - ; CHECK-NEXT: fst.s $fa0, $a1, 0 - ; CHECK-NEXT: ret - %v = load volatile <8 x float>, ptr %src -@@ -71,8 +71,8 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { - ; CHECK-LABEL: extract_4xdouble: - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: ori $a0, $zero, 3 --; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 -+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 -+; CHECK-NEXT: movgr2fr.d $fa0, $a0 - ; CHECK-NEXT: fst.d $fa0, $a1, 0 - ; CHECK-NEXT: ret - %v = load volatile <4 x double>, ptr %src -@@ -84,12 +84,22 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { - define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_32xi8_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2 --; CHECK-NEXT: movfr2gr.s $a0, $fa0 --; CHECK-NEXT: srai.w $a0, $a0, 24 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0 -+; CHECK-NEXT: ld.b $a0, $a0, 0 - ; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <32 x i8>, ptr %src - %e = extractelement <32 x i8> %v, i32 %idx -@@ -100,12 +110,22 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_16xi16_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2 --; CHECK-NEXT: movfr2gr.s $a0, $fa0 --; CHECK-NEXT: srai.w $a0, $a0, 16 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1 -+; CHECK-NEXT: ld.h $a0, $a0, 0 - ; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <16 x i16>, ptr %src - %e = extractelement <16 x i16> %v, i32 %idx -@@ -116,11 +136,22 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_8xi32_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 --; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -+; CHECK-NEXT: ld.w $a0, $a0, 0 - ; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <8 x i32>, ptr %src - %e = extractelement <8 x i32> %v, i32 %idx -@@ -131,11 +162,22 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_4xi64_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 --; CHECK-NEXT: movfr2gr.d $a0, $fa0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -+; CHECK-NEXT: ld.d $a0, $a0, 0 - ; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <4 x i64>, ptr %src - %e = extractelement <4 x i64> %v, i32 %idx -@@ -146,10 +188,22 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_8xfloat_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -+; CHECK-NEXT: fld.s $fa0, $a0, 0 - ; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <8 x float>, ptr %src - %e = extractelement <8 x float> %v, i32 %idx -@@ -160,10 +214,22 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_4xdouble_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -+; CHECK-NEXT: fld.d $fa0, $a0, 0 - ; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <4 x double>, ptr %src - %e = extractelement <4 x double> %v, i32 %idx --- -2.20.1 - - -From 5953c8d6a82ac2ad2438de5dd46525a9f7a084fb Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 27 Dec 2023 16:31:49 +0800 -Subject: [PATCH 28/35] [LoongArch] Fix incorrect pattern XVREPL128VEI_{W/D} - instructions - -Remove the incorrect patterns for `XVREPL128VEI_{W/D}` instructions, -and add correct patterns for XVREPLVE0_{W/D} instructions - -(cherry picked from commit c7367f985e0d27aeb8bc993406d1b9f4ca307399) ---- - llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 6 +++--- - llvm/test/CodeGen/LoongArch/lasx/build-vector.ll | 4 ++-- - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 9b7a34688811..059689cef840 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1571,11 +1571,11 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), - def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), - (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; - --// XVREPL128VEI_{W/D} -+// XVREPLVE0_{W/D} - def : Pat<(lasxsplatf32 FPR32:$fj), -- (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; -+ (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; - def : Pat<(lasxsplatf64 FPR64:$fj), -- (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; -+ (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>; - - // Loads/Stores - foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { -diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -index 6824ab5cda8d..ae6f31aaec64 100644 ---- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -@@ -57,7 +57,7 @@ define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { - ; CHECK-LABEL: buildvector_v8f32_splat: - ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 --; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0 -+; CHECK-NEXT: xvreplve0.w $xr0, $xr0 - ; CHECK-NEXT: xvst $xr0, $a0, 0 - ; CHECK-NEXT: ret - entry: -@@ -71,7 +71,7 @@ define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { - ; CHECK-LABEL: buildvector_v4f64_splat: - ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 --; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 -+; CHECK-NEXT: xvreplve0.d $xr0, $xr0 - ; CHECK-NEXT: xvst $xr0, $a0, 0 - ; CHECK-NEXT: ret - entry: --- -2.20.1 - - -From 7e21c962da87491bb438ea3906826875f53f2931 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Thu, 28 Dec 2023 20:56:32 +0800 -Subject: [PATCH 29/35] [LoongArch] Fix incorrect pattern [X]VBITSELI_B - instructions - -Adjusted the operand order of [X]VBITSELI_B to correctly match vselect. - -(cherry picked from commit da5378e87e11689d05a58198d6e15e9551916794) ---- - llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 4 ++-- - llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 4 ++-- - llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 6 +++--- - llvm/test/CodeGen/LoongArch/lsx/vselect.ll | 6 +++--- - 4 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 059689cef840..b3c11bc5423d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1600,8 +1600,8 @@ def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), - (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; - - // vselect --def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, -- (v32i8 (SplatPat_uimm8 uimm8:$imm)))), -+def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)), -+ LASX256:$xj)), - (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; - foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in - def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index e468176885d7..5569c2cd15b5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1731,8 +1731,8 @@ def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), - (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; - - // vselect --def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, -- (v16i8 (SplatPat_uimm8 uimm8:$imm)))), -+def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)), -+ LSX128:$vj)), - (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; - foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in - def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), -diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -index 24f4bcf752d3..ec2fc28db33c 100644 ---- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -@@ -6,11 +6,11 @@ define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a1, 0 - ; CHECK-NEXT: xvrepli.h $xr1, -256 --; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 --; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: xvbitseli.b $xr1, $xr0, 1 -+; CHECK-NEXT: xvst $xr1, $a0, 0 - ; CHECK-NEXT: ret - %v0 = load <32 x i8>, ptr %a0 -- %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> -+ %sel = select <32 x i1> , <32 x i8> , <32 x i8> %v0 - store <32 x i8> %sel, ptr %res - ret void - } -diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -index 00e3d9313f13..746152f0f026 100644 ---- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -@@ -6,11 +6,11 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { - ; CHECK: # %bb.0: - ; CHECK-NEXT: vld $vr0, $a1, 0 - ; CHECK-NEXT: vrepli.h $vr1, -256 --; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 --; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: vbitseli.b $vr1, $vr0, 255 -+; CHECK-NEXT: vst $vr1, $a0, 0 - ; CHECK-NEXT: ret - %v0 = load <16 x i8>, ptr %a0 -- %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> -+ %sel = select <16 x i1> , <16 x i8> , <16 x i8> %v0 - store <16 x i8> %sel, ptr %res - ret void - } --- -2.20.1 - - -From 9aab6c004b73d1069444b17a9768310f288b3130 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 23 Jan 2024 09:06:35 +0800 -Subject: [PATCH 30/35] [LoongArch] Permit auto-vectorization using LSX/LASX - with `auto-vec` feature (#78943) - -With enough codegen complete, we can now correctly report the size of -vector registers for LSX/LASX, allowing auto vectorization (The -`auto-vec` feature needs to be enabled simultaneously). - -As described, the `auto-vec` feature is an experimental one. To ensure -that automatic vectorization is not enabled by default, because the -information provided by the current `TTI` cannot yield additional -benefits for automatic vectorization. - -(cherry picked from commit fcff4582f01db2f5a99e3acf452aec9f2d8a126a) ---- - llvm/lib/Target/LoongArch/LoongArch.td | 4 ++ - .../lib/Target/LoongArch/LoongArchSubtarget.h | 2 + - .../LoongArchTargetTransformInfo.cpp | 18 +++++ - .../LoongArch/LoongArchTargetTransformInfo.h | 2 + - .../LoopVectorize/LoongArch/defaults.ll | 66 +++++++++++++++++++ - .../LoopVectorize/LoongArch/lit.local.cfg | 4 ++ - 6 files changed, 96 insertions(+) - create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll - create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg - -diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td -index 75b65fe69f26..2a4c991a43b0 100644 ---- a/llvm/lib/Target/LoongArch/LoongArch.td -+++ b/llvm/lib/Target/LoongArch/LoongArch.td -@@ -105,6 +105,10 @@ def FeatureUAL - def FeatureRelax - : SubtargetFeature<"relax", "HasLinkerRelax", "true", - "Enable Linker relaxation">; -+// Experimental auto vectorization -+def FeatureAutoVec -+ : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", -+ "Experimental auto vectorization">; - - //===----------------------------------------------------------------------===// - // Registers, instruction descriptions ... -diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -index 5c173675cca4..174e4cba8326 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -@@ -44,6 +44,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { - bool HasLaLocalWithAbs = false; - bool HasUAL = false; - bool HasLinkerRelax = false; -+ bool HasExpAutoVec = false; - unsigned GRLen = 32; - MVT GRLenVT = MVT::i32; - LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; -@@ -102,6 +103,7 @@ public: - bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } - bool hasUAL() const { return HasUAL; } - bool hasLinkerRelax() const { return HasLinkerRelax; } -+ bool hasExpAutoVec() const { return HasExpAutoVec; } - MVT getGRLenVT() const { return GRLenVT; } - unsigned getGRLen() const { return GRLen; } - LoongArchABI::ABI getTargetABI() const { return TargetABI; } -diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -index a6de86eea116..04349aa52b54 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -@@ -19,4 +19,22 @@ using namespace llvm; - - #define DEBUG_TYPE "loongarchtti" - -+TypeSize LoongArchTTIImpl::getRegisterBitWidth( -+ TargetTransformInfo::RegisterKind K) const { -+ switch (K) { -+ case TargetTransformInfo::RGK_Scalar: -+ return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); -+ case TargetTransformInfo::RGK_FixedWidthVector: -+ if (ST->hasExtLASX() && ST->hasExpAutoVec()) -+ return TypeSize::getFixed(256); -+ if (ST->hasExtLSX() && ST->hasExpAutoVec()) -+ return TypeSize::getFixed(128); -+ return TypeSize::getFixed(0); -+ case TargetTransformInfo::RGK_ScalableVector: -+ return TypeSize::getScalable(0); -+ } -+ -+ llvm_unreachable("Unsupported register kind"); -+} -+ - // TODO: Implement more hooks to provide TTI machinery for LoongArch. -diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h -index 9e02f793ba8a..d296c9ed576f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h -+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h -@@ -39,6 +39,8 @@ public: - : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), - TLI(ST->getTargetLowering()) {} - -+ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; -+ - // TODO: Implement more hooks to provide TTI machinery for LoongArch. - }; - -diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll -new file mode 100644 -index 000000000000..a8ac2411dd82 ---- /dev/null -+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll -@@ -0,0 +1,66 @@ -+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -+; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s -+ -+;; This is a collection of tests whose only purpose is to show changes in the -+;; default configuration. Please keep these tests minimal - if you're testing -+;; functionality of some specific configuration, please place that in a -+;; seperate test file with a hard coded configuration (even if that -+;; configuration is the current default). -+ -+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" -+target triple = "loongarch64" -+ -+define void @vector_add(ptr noalias nocapture %a, i64 %v) { -+; CHECK-LABEL: define void @vector_add -+; CHECK-SAME: (ptr noalias nocapture [[A:%.*]], i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] { -+; CHECK-NEXT: entry: -+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -+; CHECK: vector.ph: -+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 -+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -+; CHECK: vector.body: -+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] -+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 -+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -+; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8 -+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -+; CHECK: middle.block: -+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] -+; CHECK: scalar.ph: -+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -+; CHECK-NEXT: br label [[FOR_BODY:%.*]] -+; CHECK: for.body: -+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -+; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] -+; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 -+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -+; CHECK: for.end: -+; CHECK-NEXT: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv -+ %elem = load i64, ptr %arrayidx -+ %add = add i64 %elem, %v -+ store i64 %add, ptr %arrayidx -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond.not = icmp eq i64 %iv.next, 1024 -+ br i1 %exitcond.not, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg -new file mode 100644 -index 000000000000..9570af17fe5f ---- /dev/null -+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg -@@ -0,0 +1,4 @@ -+config.suffixes = [".ll"] -+ -+if not "LoongArch" in config.root.targets: -+ config.unsupported = True --- -2.20.1 - - -From 61b8589c3c71026af28b6a71e0b5d4c41b7c78d1 Mon Sep 17 00:00:00 2001 -From: yjijd -Date: Tue, 23 Jan 2024 15:16:23 +0800 -Subject: [PATCH 31/35] [CodeGen][LoongArch] Set SINT_TO_FP/UINT_TO_FP to legal - for vector types (#78924) - -Support the following conversions: -v4i32->v4f32, v2i64->v2f64(LSX) -v8i32->v8f32, v4i64->v4f64(LASX) -v4i32->v4f64, v4i64->v4f32(LASX) - -(cherry picked from commit f799f936929c232a16abc7c520a10fecadbf05f9) ---- - .../LoongArch/LoongArchISelLowering.cpp | 4 ++ - .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 +++ - .../LoongArch/lasx/ir-instruction/sitofp.ll | 57 +++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/uitofp.ll | 57 +++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/sitofp.ll | 28 +++++++++ - .../LoongArch/lsx/ir-instruction/uitofp.ll | 28 +++++++++ - 7 files changed, 204 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index cf881ce720a6..7a360b42e15d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -256,6 +256,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, - Expand); - } -+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, -+ {MVT::v4i32, MVT::v2i64}, Legal); - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -@@ -298,6 +300,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, - Expand); - } -+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, -+ {MVT::v8i32, MVT::v4i32, MVT::v4i64}, Legal); - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index b3c11bc5423d..b3e74b480922 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1611,6 +1611,28 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in - def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; - def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; - -+// XVFFINT_{S_W/D_L} -+def : Pat<(v8f32 (sint_to_fp v8i32:$vj)), (XVFFINT_S_W v8i32:$vj)>; -+def : Pat<(v4f64 (sint_to_fp v4i64:$vj)), (XVFFINT_D_L v4i64:$vj)>; -+def : Pat<(v4f64 (sint_to_fp v4i32:$vj)), -+ (XVFFINT_D_L (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, -+ sub_128)))>; -+def : Pat<(v4f32 (sint_to_fp v4i64:$vj)), -+ (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_L v4i64:$vj), 238), -+ (XVFFINT_D_L v4i64:$vj)), -+ sub_128)>; -+ -+// XVFFINT_{S_WU/D_LU} -+def : Pat<(v8f32 (uint_to_fp v8i32:$vj)), (XVFFINT_S_WU v8i32:$vj)>; -+def : Pat<(v4f64 (uint_to_fp v4i64:$vj)), (XVFFINT_D_LU v4i64:$vj)>; -+def : Pat<(v4f64 (uint_to_fp v4i32:$vj)), -+ (XVFFINT_D_LU (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, -+ sub_128)))>; -+def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), -+ (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_LU v4i64:$vj), 238), -+ (XVFFINT_D_LU v4i64:$vj)), -+ sub_128)>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 5569c2cd15b5..63eac4d1aeb7 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1742,6 +1742,14 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in - def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; - def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; - -+// VFFINT_{S_W/D_L} -+def : Pat<(v4f32 (sint_to_fp v4i32:$vj)), (VFFINT_S_W v4i32:$vj)>; -+def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; -+ -+// VFFINT_{S_WU/D_LU} -+def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; -+def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll -new file mode 100644 -index 000000000000..208a758ea4e9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll -@@ -0,0 +1,57 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @sitofp_v8i32_v8f32(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v8i32_v8f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.s.w $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %in -+ %v1 = sitofp <8 x i32> %v0 to <8 x float> -+ store <8 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @sitofp_v4f64_v4f64(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v4f64_v4f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.d.l $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %in -+ %v1 = sitofp <4 x i64> %v0 to <4 x double> -+ store <4 x double> %v1, ptr %res -+ ret void -+} -+ -+define void @sitofp_v4i64_v4f32(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v4i64_v4f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.d.l $xr0, $xr0 -+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %in -+ %v1 = sitofp <4 x i64> %v0 to <4 x float> -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @sitofp_v4i32_v4f64(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v4i32_v4f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 -+; CHECK-NEXT: xvffint.d.l $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %in -+ %v1 = sitofp <4 x i32> %v0 to <4 x double> -+ store <4 x double> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll -new file mode 100644 -index 000000000000..70cf71c4cec2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll -@@ -0,0 +1,57 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @uitofp_v8i32_v8f32(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v8i32_v8f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %in -+ %v1 = uitofp <8 x i32> %v0 to <8 x float> -+ store <8 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @uitofp_v4f64_v4f64(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v4f64_v4f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %in -+ %v1 = uitofp <4 x i64> %v0 to <4 x double> -+ store <4 x double> %v1, ptr %res -+ ret void -+} -+ -+define void @uitofp_v4i64_v4f32(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v4i64_v4f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 -+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %in -+ %v1 = uitofp <4 x i64> %v0 to <4 x float> -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @uitofp_v4i32_v4f64(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v4i32_v4f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 -+; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %in -+ %v1 = uitofp <4 x i32> %v0 to <4 x double> -+ store <4 x double> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll -new file mode 100644 -index 000000000000..1e820a37a240 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @sitofp_v4i32_v4f32(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v4i32_v4f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vffint.s.w $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %in -+ %v1 = sitofp <4 x i32> %v0 to <4 x float> -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @sitofp_v2i64_v2f64(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v2i64_v2f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vffint.d.l $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %in -+ %v1 = sitofp <2 x i64> %v0 to <2 x double> -+ store <2 x double> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll -new file mode 100644 -index 000000000000..3d4913f12e57 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @uitofp_v4i32_v4f32(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v4i32_v4f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vffint.s.wu $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %in -+ %v1 = uitofp <4 x i32> %v0 to <4 x float> -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @uitofp_v2i64_v2f64(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v2i64_v2f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vffint.d.lu $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %in -+ %v1 = uitofp <2 x i64> %v0 to <2 x double> -+ store <2 x double> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From 0bf1418c5f46ca74dfc8903757b3bb14e0760633 Mon Sep 17 00:00:00 2001 -From: yjijd -Date: Tue, 23 Jan 2024 15:57:06 +0800 -Subject: [PATCH 32/35] [CodeGen][LoongArch] Set FP_TO_SINT/FP_TO_UINT to legal - for vector types (#79107) - -Support the following conversions: -v4f32->v4i32, v2f64->v2i64(LSX) -v8f32->v8i32, v4f64->v4i64(LASX) -v4f32->v4i64, v4f64->v4i32(LASX) - -(cherry picked from commit 44ba6ebc999d6e9b27bedfe04a993adfd204dc6a) ---- - .../LoongArch/LoongArchISelLowering.cpp | 12 ++-- - .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 +++ - .../LoongArch/lasx/ir-instruction/fptosi.ll | 57 +++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/fptoui.ll | 57 +++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/fptosi.ll | 28 +++++++++ - .../LoongArch/lsx/ir-instruction/fptoui.ll | 28 +++++++++ - 7 files changed, 208 insertions(+), 4 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 7a360b42e15d..f7eacd56c542 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -256,8 +256,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, - Expand); - } -- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, -- {MVT::v4i32, MVT::v2i64}, Legal); -+ for (MVT VT : {MVT::v4i32, MVT::v2i64}) { -+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); -+ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); -+ } - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -@@ -300,8 +302,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, - Expand); - } -- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, -- {MVT::v8i32, MVT::v4i32, MVT::v4i64}, Legal); -+ for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { -+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); -+ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); -+ } - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index b3e74b480922..492b62da6ce7 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1633,6 +1633,28 @@ def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), - (XVFFINT_D_LU v4i64:$vj)), - sub_128)>; - -+// XVFTINTRZ_{W_S/L_D} -+def : Pat<(v8i32 (fp_to_sint v8f32:$vj)), (XVFTINTRZ_W_S v8f32:$vj)>; -+def : Pat<(v4i64 (fp_to_sint v4f64:$vj)), (XVFTINTRZ_L_D v4f64:$vj)>; -+def : Pat<(v4i64 (fp_to_sint v4f32:$vj)), -+ (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), (VFTINTRZ_W_S v4f32:$vj), -+ sub_128))>; -+def : Pat<(v4i32 (fp_to_sint (v4f64 LASX256:$vj))), -+ (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), -+ v4f64:$vj)), -+ sub_128)>; -+ -+// XVFTINTRZ_{W_SU/L_DU} -+def : Pat<(v8i32 (fp_to_uint v8f32:$vj)), (XVFTINTRZ_WU_S v8f32:$vj)>; -+def : Pat<(v4i64 (fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)>; -+def : Pat<(v4i64 (fp_to_uint v4f32:$vj)), -+ (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), (VFTINTRZ_WU_S v4f32:$vj), -+ sub_128))>; -+def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))), -+ (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), -+ v4f64:$vj)), -+ sub_128)>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 63eac4d1aeb7..99ac2f3c162f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1750,6 +1750,14 @@ def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; - def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; - def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; - -+// VFTINTRZ_{W_S/L_D} -+def : Pat<(v4i32 (fp_to_sint v4f32:$vj)), (VFTINTRZ_W_S v4f32:$vj)>; -+def : Pat<(v2i64 (fp_to_sint v2f64:$vj)), (VFTINTRZ_L_D v2f64:$vj)>; -+ -+// VFTINTRZ_{W_SU/L_DU} -+def : Pat<(v4i32 (fp_to_uint v4f32:$vj)), (VFTINTRZ_WU_S v4f32:$vj)>; -+def : Pat<(v2i64 (fp_to_uint v2f64:$vj)), (VFTINTRZ_LU_D v2f64:$vj)>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll -new file mode 100644 -index 000000000000..0d9f57b57ffa ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll -@@ -0,0 +1,57 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fptosi_v8f32_v8i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v8f32_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %in -+ %v1 = fptosi <8 x float> %v0 to <8 x i32> -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptosi_v4f64_v4i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v4f64_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %in -+ %v1 = fptosi <4 x double> %v0 to <4 x i64> -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @fptosi_v4f64_v4i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v4f64_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %in -+ %v1 = fptosi <4 x double> %v0 to <4 x i32> -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptosi_v4f32_v4i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v4f32_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 -+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %in -+ %v1 = fptosi <4 x float> %v0 to <4 x i64> -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll -new file mode 100644 -index 000000000000..27d70f33cd34 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll -@@ -0,0 +1,57 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fptoui_v8f32_v8i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v8f32_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %in -+ %v1 = fptoui <8 x float> %v0 to <8 x i32> -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptoui_v4f64_v4i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v4f64_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %in -+ %v1 = fptoui <4 x double> %v0 to <4 x i64> -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @fptoui_v4f64_v4i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v4f64_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %in -+ %v1 = fptoui <4 x double> %v0 to <4 x i32> -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptoui_v4f32_v4i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v4f32_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 -+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %in -+ %v1 = fptoui <4 x float> %v0 to <4 x i64> -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll -new file mode 100644 -index 000000000000..c3008fe96e47 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fptosi_v4f32_v4i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v4f32_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %in -+ %v1 = fptosi <4 x float> %v0 to <4 x i32> -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptosi_v2f64_v2i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v2f64_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %in -+ %v1 = fptosi <2 x double> %v0 to <2 x i64> -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll -new file mode 100644 -index 000000000000..f0aeb0bd14e7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fptoui_v4f32_v4i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v4f32_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %in -+ %v1 = fptoui <4 x float> %v0 to <4 x i32> -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptoui_v2f64_v2i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v2f64_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %in -+ %v1 = fptoui <2 x double> %v0 to <2 x i64> -+ store <2 x i64> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From 66224dcebf8b0cc0d32fa5c73fbb4bca0d885a7d Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Fri, 26 Jan 2024 10:24:07 +0800 -Subject: [PATCH 33/35] [LoongArch] Fixing the incorrect return value of - LoongArchTTIImpl::getRegisterBitWidth (#79441) - -When we do not enable vector features, we should return the default -value (`TargetTransformInfoImplBase::getRegisterBitWidth`) instead of -zero. - -This should fix the LoongArch [buildbot -breakage](https://lab.llvm.org/staging/#/builders/5/builds/486) from - -(cherry picked from commit 1e9924c1f248bbddcb95d82a59708d617297dad3) -(cherry picked from commit 900e7cbfdee09c94d022e4dae923b3c7827f95e3) ---- - .../Target/LoongArch/LoongArchTargetTransformInfo.cpp | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -index 04349aa52b54..d47dded9ea6e 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -@@ -21,17 +21,20 @@ using namespace llvm; - - TypeSize LoongArchTTIImpl::getRegisterBitWidth( - TargetTransformInfo::RegisterKind K) const { -+ TypeSize DefSize = TargetTransformInfoImplBase::getRegisterBitWidth(K); - switch (K) { - case TargetTransformInfo::RGK_Scalar: - return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); - case TargetTransformInfo::RGK_FixedWidthVector: -- if (ST->hasExtLASX() && ST->hasExpAutoVec()) -+ if (!ST->hasExpAutoVec()) -+ return DefSize; -+ if (ST->hasExtLASX()) - return TypeSize::getFixed(256); -- if (ST->hasExtLSX() && ST->hasExpAutoVec()) -+ if (ST->hasExtLSX()) - return TypeSize::getFixed(128); -- return TypeSize::getFixed(0); -+ [[fallthrough]]; - case TargetTransformInfo::RGK_ScalableVector: -- return TypeSize::getScalable(0); -+ return DefSize; - } - - llvm_unreachable("Unsupported register kind"); --- -2.20.1 - - -From fe278490b48572e5f5581f35d6b4195f35693b8c Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 9 Jan 2024 20:32:20 +0800 -Subject: [PATCH 34/35] [LoongArch] Pre-commit test for #76913. NFC - -This test will crash with expensive check. - -Crash message: -``` -*** Bad machine code: Using an undefined physical register *** -- function: main -- basic block: %bb.0 entry (0x20fee70) -- instruction: $r3 = frame-destroy ADDI_D $r22, -288 -- operand 1: $r22 -``` - -(cherry picked from commit f499472de3e1184b83fc6cd78bc244a55f2cac7d) ---- - .../LoongArch/can-not-realign-stack.ll | 39 +++++++++++++++++++ - 1 file changed, 39 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll - -diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -new file mode 100644 -index 000000000000..526821076498 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -@@ -0,0 +1,39 @@ -+; REQUIRES: expensive_checks -+; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s -+ -+; XFAIL: * -+ -+;; FIXME: This test will crash with expensive check. The subsequent patch will -+;; address and fix this issue. -+ -+%struct.S = type { [64 x i16] } -+ -+define dso_local noundef signext i32 @main() nounwind { -+entry: -+ %s = alloca %struct.S, align 2 -+ call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %s) -+ store <16 x i16> , ptr %s, align 2 -+ %0 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 -+ store <16 x i16> , ptr %0, align 2 -+ %1 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 -+ store <16 x i16> , ptr %1, align 2 -+ %2 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 -+ store <16 x i16> , ptr %2, align 2 -+ call void @foo(ptr noundef nonnull %s) -+ store <16 x i16> , ptr %s, align 2 -+ %3 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 -+ store <16 x i16> , ptr %3, align 2 -+ %4 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 -+ store <16 x i16> , ptr %4, align 2 -+ %5 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 -+ store <16 x i16> , ptr %5, align 2 -+ call void @bar(ptr noundef nonnull %s) -+ call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %s) -+ ret i32 0 -+} -+ -+declare void @foo(ptr nocapture noundef) -+declare void @bar(ptr nocapture noundef) -+ -+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) -+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) --- -2.20.1 - - -From e3e2d0c2cb7cfaffe2663f5f8607dad09fcdf3a5 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 9 Jan 2024 20:35:49 +0800 -Subject: [PATCH 35/35] [LoongArch] Implement - LoongArchRegisterInfo::canRealignStack() (#76913) - -This patch fixes the crash issue in the test: -CodeGen/LoongArch/can-not-realign-stack.ll - -Register allocator may spill virtual registers to the stack, which -introduces stack alignment requirements (when the size of spilled - registers exceeds the default alignment size of the stack). If a -function does not have stack alignment requirements before register -allocation, registers used for stack alignment will not be preserved. - -Therefore, we should implement `canRealignStack()` to inform the -register allocator whether it is allowed to perform stack realignment -operations. - -(cherry picked from commit 98c6aa72299caeff6b188e1ff2fc1b39c5b893b6) ---- - .../LoongArch/LoongArchRegisterInfo.cpp | 23 ++++++++ - .../Target/LoongArch/LoongArchRegisterInfo.h | 1 + - .../LoongArch/can-not-realign-stack.ll | 56 +++++++++++++++++-- - 3 files changed, 75 insertions(+), 5 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp -index 257b947a3ce4..092b5f1fb442 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp -@@ -15,6 +15,7 @@ - #include "LoongArch.h" - #include "LoongArchInstrInfo.h" - #include "LoongArchSubtarget.h" -+#include "MCTargetDesc/LoongArchBaseInfo.h" - #include "MCTargetDesc/LoongArchMCTargetDesc.h" - #include "llvm/CodeGen/MachineFrameInfo.h" - #include "llvm/CodeGen/MachineFunction.h" -@@ -194,3 +195,25 @@ bool LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); - return false; - } -+ -+bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { -+ if (!TargetRegisterInfo::canRealignStack(MF)) -+ return false; -+ -+ const MachineRegisterInfo *MRI = &MF.getRegInfo(); -+ const LoongArchFrameLowering *TFI = getFrameLowering(MF); -+ -+ // Stack realignment requires a frame pointer. If we already started -+ // register allocation with frame pointer elimination, it is too late now. -+ if (!MRI->canReserveReg(LoongArch::R22)) -+ return false; -+ -+ // We may also need a base pointer if there are dynamic allocas or stack -+ // pointer adjustments around calls. -+ if (TFI->hasReservedCallFrame(MF)) -+ return true; -+ -+ // A base pointer is required and allowed. Check that it isn't too late to -+ // reserve it. -+ return MRI->canReserveReg(LoongArchABI::getBPReg()); -+} -diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h -index 7e8f26b14097..d1e40254c297 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h -+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h -@@ -51,6 +51,7 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { - bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { - return true; - } -+ bool canRealignStack(const MachineFunction &MF) const override; - }; - } // end namespace llvm - -diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -index 526821076498..af24ae64b7c7 100644 ---- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -@@ -1,14 +1,60 @@ --; REQUIRES: expensive_checks --; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s | FileCheck %s - --; XFAIL: * -+;; This test is checking that when a function allows stack realignment and -+;; realignment needs were not detected before register allocation (at this -+;; point, fp is not preserved), but realignment is required during register -+;; allocation, the stack should not undergo realignment. - --;; FIXME: This test will crash with expensive check. The subsequent patch will --;; address and fix this issue. -+;; Ensure that the `bstrins.d $sp, $zero, n, 0` instruction is not generated. -+;; n = log2(realign_size) - 1 - - %struct.S = type { [64 x i16] } - - define dso_local noundef signext i32 @main() nounwind { -+; CHECK-LABEL: main: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: addi.d $sp, $sp, -272 -+; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 256 # 8-byte Folded Spill -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 96 # 32-byte Folded Spill -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_1) -+; CHECK-NEXT: xvld $xr1, $a0, 0 -+; CHECK-NEXT: xvst $xr1, $sp, 64 # 32-byte Folded Spill -+; CHECK-NEXT: xvst $xr1, $sp, 224 -+; CHECK-NEXT: xvst $xr0, $sp, 192 -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_2) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_2) -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -+; CHECK-NEXT: xvst $xr0, $sp, 160 -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_3) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_3) -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill -+; CHECK-NEXT: xvst $xr0, $sp, 128 -+; CHECK-NEXT: addi.d $fp, $sp, 128 -+; CHECK-NEXT: move $a0, $fp -+; CHECK-NEXT: bl %plt(foo) -+; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -+; CHECK-NEXT: xvst $xr0, $sp, 224 -+; CHECK-NEXT: xvld $xr0, $sp, 96 # 32-byte Folded Reload -+; CHECK-NEXT: xvst $xr0, $sp, 192 -+; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -+; CHECK-NEXT: xvst $xr0, $sp, 160 -+; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -+; CHECK-NEXT: xvst $xr0, $sp, 128 -+; CHECK-NEXT: move $a0, $fp -+; CHECK-NEXT: bl %plt(bar) -+; CHECK-NEXT: move $a0, $zero -+; CHECK-NEXT: ld.d $fp, $sp, 256 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 264 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 272 -+; CHECK-NEXT: ret - entry: - %s = alloca %struct.S, align 2 - call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %s) --- -2.20.1 - diff --git a/0011-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch b/0011-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch new file mode 100644 index 0000000..9f9b7f4 --- /dev/null +++ b/0011-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch @@ -0,0 +1,85 @@ +From 98951b297f1a6635e9533d677fbdff4496d5bb8e Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Tue, 31 Oct 2023 21:18:06 +0800 +Subject: [PATCH 11/27] [LoongArch] Fix ABI mismatch with gcc/g++ about empty + structs passing (#70320) + +How empty structs (not as fields of container struct) are passed in C++ +is not explicitly documented in psABI. However, this patch fixes the +mismatch with g++. + +Note that the unnamed bitfield case `struct { int : 1; }` in C is also +fixed. Previously clang regards it as an empty struct and then ignores +it when passing. Now size of the struct is counted; since it's size is +not 0, clang will not ignore it even in C. + +While https://reviews.llvm.org/D156116 fixed the handling of empty +struct when considering eligibility of the container struct for the FP +calling convention ('flattening'), this patch fixes the handling of +passing the empty struct itself. + +Fix https://github.com/llvm/llvm-project/issues/70319 + +(cherry picked from commit 9ca6bf3fb7b7df373723b3275730f101f9ff816b) +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 10 ++++++---- + clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c | 8 ++++---- + 2 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 7483bf6d6d1e..bc508a99da9c 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -308,12 +308,14 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + CGCXXABI::RAA_DirectInMemory); + } + +- // Ignore empty structs/unions. +- if (isEmptyRecord(getContext(), Ty, true)) +- return ABIArgInfo::getIgnore(); +- + uint64_t Size = getContext().getTypeSize(Ty); + ++ // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or ++ // `struct { int a[0]; }` in C++. In C++, `struct { }` is empty but it's size ++ // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour. ++ if (isEmptyRecord(getContext(), Ty, true) && Size == 0) ++ return ABIArgInfo::getIgnore(); ++ + // Pass floating point values via FARs if possible. + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FRLen >= Size && FARsLeft) { +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index d0daafac336e..281b7b15841a 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -93,7 +93,7 @@ struct s9 test_s9(struct s9 a) { + } + + // CHECK-C: define{{.*}} void @test_s10() +-// CHECK-CXX: define{{.*}} void @_Z8test_s103s10() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s103s10(i64 {{.*}}) + struct s10 { }; + struct s10 test_s10(struct s10 a) { + return a; +@@ -128,14 +128,14 @@ struct s14 test_s14(struct s14 a) { + } + + // CHECK-C: define{{.*}} void @test_s15() +-// CHECK-CXX: define{{.*}} void @_Z8test_s153s15() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s153s15(i64 {{.*}}) + struct s15 { int : 0; }; + struct s15 test_s15(struct s15 a) { + return a; + } + +-// CHECK-C: define{{.*}} void @test_s16() +-// CHECK-CXX: define{{.*}} void @_Z8test_s163s16() ++// CHECK-C: define{{.*}} i64 @test_s16(i64 {{.*}}) ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s163s16(i64 {{.*}}) + struct s16 { int : 1; }; + struct s16 test_s16(struct s16 a) { + return a; +-- +2.20.1 + diff --git a/0011-LoongArch-Fix-typos.-NFC.patch b/0011-LoongArch-Fix-typos.-NFC.patch new file mode 100644 index 0000000..0a1ed3b --- /dev/null +++ b/0011-LoongArch-Fix-typos.-NFC.patch @@ -0,0 +1,300 @@ +From 7768b478443d9706b7ac3a0897471ef0951ad3d6 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 29 Aug 2023 19:16:20 +0800 +Subject: [PATCH 11/42] [LoongArch] Fix typos. NFC + +(cherry picked from commit 30b6b27385f8ddc550df54a097434a121ae56d12) + +--- + .../LoongArch/LoongArchLASXInstrInfo.td | 52 +++++++++---------- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 50 +++++++++--------- + 2 files changed, 51 insertions(+), 51 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index a3afd4789dfc..947950be2b8f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1545,10 +1545,10 @@ foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", + // Pat<(Intrinsic timm:$imm) + // (LAInst timm:$imm)>; + def : Pat<(int_loongarch_lasx_xvldi timm:$imm), +- (XVLDI (to_valide_timm timm:$imm))>; ++ (XVLDI (to_valid_timm timm:$imm))>; + foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in + def : Pat<(deriveLASXIntrinsic.ret timm:$imm), +- (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; ++ (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; + + // vty: v32i8/v16i16/v8i32/v4i64 + // Pat<(Intrinsic vty:$xj, timm:$imm) +@@ -1558,25 +1558,25 @@ foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", + "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", + "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in + def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), +- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", + "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", + "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", + "XVREPL128VEI_H", "XVSHUF4I_H"] in + def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), +- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", + "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", + "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", + "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in + def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), +- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", + "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", + "XVPICKVE2GR_D", "XVPICKVE2GR_DU", + "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), +- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + + // vty: v32i8/v16i16/v8i32/v4i64 + // Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) +@@ -1588,7 +1588,7 @@ foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", + def : Pat<(deriveLASXIntrinsic.ret + (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", + "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", + "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", +@@ -1596,7 +1596,7 @@ foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", + "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", + "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", +@@ -1604,7 +1604,7 @@ foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", + def : Pat<(deriveLASXIntrinsic.ret + (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", + "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", + "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", +@@ -1612,7 +1612,7 @@ foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + + // vty: v32i8/v16i16/v8i32/v4i64 + // Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), +@@ -1693,42 +1693,42 @@ foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_ + (!cast(Inst) LASX256:$xj)>; + + def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), +- (XVPICKVE_W v8f32:$xj, (to_valide_timm timm:$imm))>; ++ (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), +- (XVPICKVE_D v4f64:$xj, (to_valide_timm timm:$imm))>; ++ (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>; + + // load + def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), +- (XVLD GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLD GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), + (XVLDX GPR:$rj, GPR:$rk)>; + + def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), +- (XVLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), +- (XVLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), +- (XVLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), +- (XVLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; + + // store + def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), +- (XVST LASX256:$xd, GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVST LASX256:$xd, GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), + (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; + + def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), +- (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), +- (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), +- (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), +- (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + + } // Predicates = [HasExtLASX] +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 13332be0bc38..e021adcecf4d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -145,7 +145,7 @@ def lsxsplati32 : PatFrag<(ops node:$e0), + def lsxsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector node:$e0, node:$e0))>; + +-def to_valide_timm : SDNodeXForm(N); + return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); + }]>; +@@ -1639,10 +1639,10 @@ foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", + // Pat<(Intrinsic timm:$imm) + // (LAInst timm:$imm)>; + def : Pat<(int_loongarch_lsx_vldi timm:$imm), +- (VLDI (to_valide_timm timm:$imm))>; ++ (VLDI (to_valid_timm timm:$imm))>; + foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in + def : Pat<(deriveLSXIntrinsic.ret timm:$imm), +- (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; ++ (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; + + // vty: v16i8/v8i16/v4i32/v2i64 + // Pat<(Intrinsic vty:$vj, timm:$imm) +@@ -1652,25 +1652,25 @@ foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", + "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", + "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in + def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), +- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", + "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", + "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", + "VREPLVEI_H", "VSHUF4I_H"] in + def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), +- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", + "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", + "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", + "VREPLVEI_W", "VSHUF4I_W"] in + def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), +- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", + "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", + "VPICKVE2GR_D", "VPICKVE2GR_DU", + "VREPLVEI_D"] in + def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), +- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + + // vty: v16i8/v8i16/v4i32/v2i64 + // Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) +@@ -1682,7 +1682,7 @@ foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", + def : Pat<(deriveLSXIntrinsic.ret + (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", + "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", + "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", +@@ -1690,7 +1690,7 @@ foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", + "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", + "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", +@@ -1698,7 +1698,7 @@ foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", + def : Pat<(deriveLSXIntrinsic.ret + (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", + "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", + "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", +@@ -1706,7 +1706,7 @@ foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + + // vty: v16i8/v8i16/v4i32/v2i64 + // Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), +@@ -1788,36 +1788,36 @@ foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", + + // load + def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), +- (VLD GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLD GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), + (VLDX GPR:$rj, GPR:$rk)>; + + def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), +- (VLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), +- (VLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), +- (VLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), +- (VLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; + + // store + def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), +- (VST LSX128:$vd, GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VST LSX128:$vd, GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), + (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; + + def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), +- (VSTELM_B v16i8:$vd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (VSTELM_B v16i8:$vd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), +- (VSTELM_H v8i16:$vd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (VSTELM_H v8i16:$vd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), +- (VSTELM_W v4i32:$vd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (VSTELM_W v4i32:$vd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), +- (VSTELM_D v2i64:$vd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (VSTELM_D v2i64:$vd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + + } // Predicates = [HasExtLSX] +-- +2.20.1 + diff --git a/0011-lld-ELF-Add-a-corner-testcase-for-elf-getLoongArchPa.patch b/0011-lld-ELF-Add-a-corner-testcase-for-elf-getLoongArchPa.patch new file mode 100644 index 0000000..8045484 --- /dev/null +++ b/0011-lld-ELF-Add-a-corner-testcase-for-elf-getLoongArchPa.patch @@ -0,0 +1,44 @@ +From 0ad77f6b993bb60054081a22694fb0cf236b4588 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Fri, 10 Nov 2023 13:37:55 +0800 +Subject: [PATCH 11/23] [lld][ELF] Add a corner testcase for + elf::getLoongArchPageDelta + +If `page(dest) - page(pc)` is 0xfffffffffff000, i.e. page(pc) is next +to page(dest), and lo12(dest) > 0x7ff, correct %pc64_lo12 and %pc64_hi12 +should be both -1 (which can be checked with binutils) but they are both +0 on lld. This patch adds such a test showing lld's incorrect behaviour +and following patch will fix this issue. + +(cherry picked from commit e752b58e0d26fc08bca6b2a4e56b05af7f8d8d66) +--- + lld/test/ELF/loongarch-pc-aligned.s | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/lld/test/ELF/loongarch-pc-aligned.s b/lld/test/ELF/loongarch-pc-aligned.s +index 9df3492d1877..f6ac56e5261d 100644 +--- a/lld/test/ELF/loongarch-pc-aligned.s ++++ b/lld/test/ELF/loongarch-pc-aligned.s +@@ -260,6 +260,19 @@ + # EXTREME15-NEXT: lu32i.d $t0, -349526 + # EXTREME15-NEXT: lu52i.d $t0, $t0, -1093 + ++## FIXME: Correct %pc64_lo20 should be 0xfffff (-1) and %pc64_hi12 should be 0xfff (-1), but current values are: ++## page delta = 0x0000000000000000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x00000 = 0 ++## %pc64_lo20 = 0x00000 = 0 ++## %pc64_hi12 = 0x00000 = 0 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x0000000012344888 --section-start=.text=0x0000000012345678 -o %t/extreme16 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme16 | FileCheck %s --check-prefix=EXTREME16 ++# EXTREME16: addi.d $t0, $zero, -1912 ++# EXTREME16-NEXT: pcalau12i $t1, 0 ++# EXTREME16-NEXT: lu32i.d $t0, 0 ++# EXTREME16-NEXT: lu52i.d $t0, $t0, 0 ++ + #--- a.s + .rodata + x: +-- +2.20.1 + diff --git a/0008-Backport-test-Update-dwarf-loongarch-relocs.ll.patch b/0011-test-Update-dwarf-loongarch-relocs.ll.patch similarity index 98% rename from 0008-Backport-test-Update-dwarf-loongarch-relocs.ll.patch rename to 0011-test-Update-dwarf-loongarch-relocs.ll.patch index 4ed67f0..b78c6f9 100644 --- a/0008-Backport-test-Update-dwarf-loongarch-relocs.ll.patch +++ b/0011-test-Update-dwarf-loongarch-relocs.ll.patch @@ -9,7 +9,6 @@ http://45.33.8.238/linux/128902/step_12.txt (cherry picked from commit baba7e4175b6ca21e83b1cf8229f29dbba02e979) (cherry picked from commit c9e73cdd9a17f15ede120ea57657553f9e105eab) -Change-Id: I00aa1414f556f0ba5ff6bf6a879a6fc1fcfa49e0 --- .../LoongArch/dwarf-loongarch-relocs.ll | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch b/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch deleted file mode 100644 index e40be81..0000000 --- a/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch +++ /dev/null @@ -1,2474 +0,0 @@ -From 0bce68310dc0ff6a09ec2cf5c3ae32400c631324 Mon Sep 17 00:00:00 2001 -From: zhanglimin -Date: Tue, 12 Sep 2023 09:51:16 +0800 -Subject: [PATCH 01/14] [sanitizer][msan] VarArgHelper for loongarch64 - -This patch adds support for variadic argument for loongarch64, -which is based on MIPS64. And `check-msan` all pass. - -Reviewed By: vitalybuka - -Differential Revision: https://reviews.llvm.org/D158587 - -(cherry picked from commit ec42c78cc43ac1e8364e5a0941aa5fc91b813dd3) ---- - .../Instrumentation/MemorySanitizer.cpp | 7 ++ - .../LoongArch/vararg-loongarch64.ll | 78 +++++++++++++++++++ - 2 files changed, 85 insertions(+) - create mode 100644 llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll - -diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp -index 83d90049abc3..362fd6e4151f 100644 ---- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp -+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp -@@ -4945,6 +4945,7 @@ struct VarArgAMD64Helper : public VarArgHelper { - }; - - /// MIPS64-specific implementation of VarArgHelper. -+/// NOTE: This is also used for LoongArch64. - struct VarArgMIPS64Helper : public VarArgHelper { - Function &F; - MemorySanitizer &MS; -@@ -5836,6 +5837,10 @@ struct VarArgSystemZHelper : public VarArgHelper { - } - }; - -+// Loongarch64 is not a MIPS, but the current vargs calling convention matches -+// the MIPS. -+using VarArgLoongArch64Helper = VarArgMIPS64Helper; -+ - /// A no-op implementation of VarArgHelper. - struct VarArgNoOpHelper : public VarArgHelper { - VarArgNoOpHelper(Function &F, MemorySanitizer &MS, -@@ -5868,6 +5873,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, - return new VarArgPowerPC64Helper(Func, Msan, Visitor); - else if (TargetTriple.getArch() == Triple::systemz) - return new VarArgSystemZHelper(Func, Msan, Visitor); -+ else if (TargetTriple.isLoongArch64()) -+ return new VarArgLoongArch64Helper(Func, Msan, Visitor); - else - return new VarArgNoOpHelper(Func, Msan, Visitor); - } -diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll -new file mode 100644 -index 000000000000..8a4ab59588ad ---- /dev/null -+++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll -@@ -0,0 +1,78 @@ -+; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s -+ -+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" -+target triple = "loongarch64-unknown-linux-gnu" -+ -+;; First, check allocation of the save area. -+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 -+declare void @llvm.va_start(ptr) #2 -+declare void @llvm.va_end(ptr) #2 -+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 -+define i32 @foo(i32 %guard, ...) { -+; CHECK-LABEL: @foo -+; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -+; CHECK: [[TMP2:%.*]] = add i64 0, [[TMP1]] -+; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP2]] -+; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false) -+; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800) -+; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false) -+; -+ %vl = alloca ptr, align 8 -+ call void @llvm.lifetime.start.p0(i64 32, ptr %vl) -+ call void @llvm.va_start(ptr %vl) -+ call void @llvm.va_end(ptr %vl) -+ call void @llvm.lifetime.end.p0(i64 32, ptr %vl) -+ ret i32 0 -+} -+ -+;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls -+;; array. -+define i32 @bar() { -+; CHECK-LABEL: @bar -+; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 -+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -+; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls -+; -+ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) -+ ret i32 %1 -+} -+ -+;; Check multiple fixed arguments. -+declare i32 @foo2(i32 %g1, i32 %g2, ...) -+define i32 @bar2() { -+; CHECK-LABEL: @bar2 -+; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 -+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -+; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls -+; -+ %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) -+ ret i32 %1 -+} -+ -+;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are -+;; passed to a variadic function. -+declare i64 @sum(i64 %n, ...) -+define dso_local i64 @many_args() { -+;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. -+; CHECK-LABEL: @many_args -+; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) -+; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) -+; -+entry: -+ %ret = call i64 (i64, ...) @sum(i64 120, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 -+ ) -+ ret i64 %ret -+} --- -2.20.1 - - -From f1265a12fa947b79967552ab520f904486c76353 Mon Sep 17 00:00:00 2001 -From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> -Date: Thu, 28 Sep 2023 15:26:18 +0800 -Subject: [PATCH 02/14] [LowerTypeTests] Add loongarch64 to CFI jumptables - (#67312) - -This patch implements jump tables for loongarch64. - -(cherry picked from commit 0e8a8c85f8765c086c573f36e60c895920381e18) ---- - llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 9 ++++++++- - llvm/test/Transforms/LowerTypeTests/function-weak.ll | 2 ++ - llvm/test/Transforms/LowerTypeTests/function.ll | 9 +++++++++ - 3 files changed, 19 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp -index 9b4b3efd7283..a89d57d12615 100644 ---- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp -+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp -@@ -1196,6 +1196,7 @@ static const unsigned kARMJumpTableEntrySize = 4; - static const unsigned kARMBTIJumpTableEntrySize = 8; - static const unsigned kARMv6MJumpTableEntrySize = 16; - static const unsigned kRISCVJumpTableEntrySize = 8; -+static const unsigned kLOONGARCH64JumpTableEntrySize = 8; - - unsigned LowerTypeTestsModule::getJumpTableEntrySize() { - switch (JumpTableArch) { -@@ -1222,6 +1223,8 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() { - case Triple::riscv32: - case Triple::riscv64: - return kRISCVJumpTableEntrySize; -+ case Triple::loongarch64: -+ return kLOONGARCH64JumpTableEntrySize; - default: - report_fatal_error("Unsupported architecture for jump tables"); - } -@@ -1286,6 +1289,9 @@ void LowerTypeTestsModule::createJumpTableEntry( - } else if (JumpTableArch == Triple::riscv32 || - JumpTableArch == Triple::riscv64) { - AsmOS << "tail $" << ArgIndex << "@plt\n"; -+ } else if (JumpTableArch == Triple::loongarch64) { -+ AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n" -+ << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n"; - } else { - report_fatal_error("Unsupported architecture for jump tables"); - } -@@ -1304,7 +1310,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctions( - ArrayRef TypeIds, ArrayRef Functions) { - if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || - Arch == Triple::thumb || Arch == Triple::aarch64 || -- Arch == Triple::riscv32 || Arch == Triple::riscv64) -+ Arch == Triple::riscv32 || Arch == Triple::riscv64 || -+ Arch == Triple::loongarch64) - buildBitSetsFromFunctionsNative(TypeIds, Functions); - else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) - buildBitSetsFromFunctionsWASM(TypeIds, Functions); -diff --git a/llvm/test/Transforms/LowerTypeTests/function-weak.ll b/llvm/test/Transforms/LowerTypeTests/function-weak.ll -index ff69abacc8e9..c765937f1991 100644 ---- a/llvm/test/Transforms/LowerTypeTests/function-weak.ll -+++ b/llvm/test/Transforms/LowerTypeTests/function-weak.ll -@@ -4,6 +4,7 @@ - ; RUN: opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,ARM %s - ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s - ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s -+; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,LOONGARCH64 %s - - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - target triple = "x86_64-unknown-linux-gnu" -@@ -116,6 +117,7 @@ define i1 @foo(ptr %p) { - ; X86: define private void @[[JT]]() #{{.*}} align 8 { - ; ARM: define private void @[[JT]]() #{{.*}} align 4 { - ; RISCV: define private void @[[JT]]() #{{.*}} align 8 { -+; LOONGARCH64: define private void @[[JT]]() #{{.*}} align 8 { - - ; CHECK: define internal void @__cfi_global_var_init() section ".text.startup" { - ; CHECK-NEXT: entry: -diff --git a/llvm/test/Transforms/LowerTypeTests/function.ll b/llvm/test/Transforms/LowerTypeTests/function.ll -index 968c9d434eb2..802b88d92977 100644 ---- a/llvm/test/Transforms/LowerTypeTests/function.ll -+++ b/llvm/test/Transforms/LowerTypeTests/function.ll -@@ -5,6 +5,7 @@ - ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s - ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s - ; RUN: opt -S -passes=lowertypetests -mtriple=wasm32-unknown-unknown %s | FileCheck --check-prefix=WASM32 %s -+; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=LOONGARCH64,NATIVE %s - - ; The right format for Arm jump tables depends on the selected - ; subtarget, so we can't get these tests right without the Arm target -@@ -34,6 +35,7 @@ target datalayout = "e-p:64:64" - ; THUMB: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1) - ; THUMBV6M: @g = internal alias void (), getelementptr inbounds ([2 x [16 x i8]], ptr @[[JT]], i64 0, i64 1) - ; RISCV: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) -+; LOONGARCH64: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) - - ; NATIVE: define hidden void @f.cfi() - ; WASM32: define void @f() !type !{{[0-9]+}} !wasm.index ![[I0:[0-9]+]] -@@ -65,6 +67,7 @@ define i1 @foo(ptr %p) { - ; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 { - ; THUMBV6M: define private void @[[JT]]() #[[ATTR:.*]] align 16 { - ; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 { -+; LOONGARCH64: define private void @[[JT]]() #[[ATTR:.*]] align 8 { - - ; X86: jmp ${0:c}@plt - ; X86-SAME: int3 -@@ -99,6 +102,11 @@ define i1 @foo(ptr %p) { - ; RISCV: tail $0@plt - ; RISCV-SAME: tail $1@plt - -+; LOONGARCH64: pcalau12i $$t0, %pc_hi20($0) -+; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($0) -+; LOONGARCH64-SAME: pcalau12i $$t0, %pc_hi20($1) -+; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($1) -+ - ; NATIVE-SAME: "s,s"(ptr @f.cfi, ptr @g.cfi) - - ; X86-LINUX: attributes #[[ATTR]] = { naked nocf_check nounwind } -@@ -107,6 +115,7 @@ define i1 @foo(ptr %p) { - ; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" } - ; THUMBV6M: attributes #[[ATTR]] = { naked nounwind "target-features"="+thumb-mode" } - ; RISCV: attributes #[[ATTR]] = { naked nounwind "target-features"="-c,-relax" } -+; LOONGARCH64: attributes #[[ATTR]] = { naked nounwind } - - ; WASM32: ![[I0]] = !{i64 1} - ; WASM32: ![[I1]] = !{i64 2} --- -2.20.1 - - -From 6f3143e1ad0bb759b7519af81994ed3c71dcf52b Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Fri, 20 Oct 2023 10:44:55 +0800 -Subject: [PATCH 03/14] [LoongArch] Fix td pattern for CACOP LDPTE and LDDIR - -The immediate argument should be a target constant (`timm`). - -(cherry picked from commit 47826b3f148996767ebd2c67ee41c329cb364fef) ---- - llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index b2c4bb812ba5..166379d7d592 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -1857,9 +1857,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>; - /// Intrinsics - - def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12), -- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; -+ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; - def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12), -- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; -+ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; - def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>; - def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>; - def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>; -@@ -2023,9 +2023,9 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk), - def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk), - (ASRTGT_D GPR:$rj, GPR:$rk)>; - def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8), -- (LDDIR GPR:$rj, uimm8:$imm8)>; -+ (LDDIR GPR:$rj, timm:$imm8)>; - def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8), -- (LDPTE GPR:$rj, uimm8:$imm8)>; -+ (LDPTE GPR:$rj, timm:$imm8)>; - } // Predicates = [IsLA64] - - //===----------------------------------------------------------------------===// --- -2.20.1 - - -From d90b85e94180543fd1789f9e26d7931f2329069b Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Fri, 10 Nov 2023 15:54:33 +0800 -Subject: [PATCH 04/14] [LoongArch][MC] Refine MCInstrAnalysis based on - registers used (#71276) - -MCInstrAnalysis can return properties of instructions (e.g., isCall(), -isBranch(),...) based on the informations that MCInstrDesc can get from -*InstrInfo*.td files. These infos are based on opcodes only, but JIRL -can have different properties based on different registers used. - -So this patch refines several MCInstrAnalysis methods: isTerminator, -isCall,isReturn,isBranch,isUnconditionalBranch and isIndirectBranch. - -This patch also allows BOLT which will be supported on LoongArch later -to get right instruction infos. - -(cherry picked from commit f7d784709673ca185f6fb0633fd53c72e81f2ae1) ---- - .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 76 +++++++++++++ - .../unittests/Target/LoongArch/CMakeLists.txt | 1 + - .../Target/LoongArch/MCInstrAnalysisTest.cpp | 107 ++++++++++++++++++ - 3 files changed, 184 insertions(+) - create mode 100644 llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -index 942e667bc261..d580c3457fec 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -@@ -104,6 +104,82 @@ public: - - return false; - } -+ -+ bool isTerminator(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isTerminator(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0; -+ } -+ } -+ -+ bool isCall(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isCall(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() != LoongArch::R0; -+ } -+ } -+ -+ bool isReturn(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isReturn(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0 && -+ Inst.getOperand(1).getReg() == LoongArch::R1; -+ } -+ } -+ -+ bool isBranch(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isBranch(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0 && -+ Inst.getOperand(1).getReg() != LoongArch::R1; -+ } -+ } -+ -+ bool isUnconditionalBranch(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isUnconditionalBranch(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0 && -+ Inst.getOperand(1).getReg() != LoongArch::R1; -+ } -+ } -+ -+ bool isIndirectBranch(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isIndirectBranch(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0 && -+ Inst.getOperand(1).getReg() != LoongArch::R1; -+ } -+ } - }; - - } // end namespace -diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt -index fef4f8e15461..e6f8ec073721 100644 ---- a/llvm/unittests/Target/LoongArch/CMakeLists.txt -+++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt -@@ -20,6 +20,7 @@ set(LLVM_LINK_COMPONENTS - - add_llvm_target_unittest(LoongArchTests - InstSizes.cpp -+ MCInstrAnalysisTest.cpp - ) - - set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests") -diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -new file mode 100644 -index 000000000000..6a208d274a0d ---- /dev/null -+++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -@@ -0,0 +1,107 @@ -+//===- MCInstrAnalysisTest.cpp - LoongArchMCInstrAnalysis unit tests ------===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/MC/MCInstrAnalysis.h" -+#include "MCTargetDesc/LoongArchMCTargetDesc.h" -+#include "llvm/MC/MCInstBuilder.h" -+#include "llvm/MC/TargetRegistry.h" -+#include "llvm/Support/TargetSelect.h" -+ -+#include "gtest/gtest.h" -+ -+#include -+ -+using namespace llvm; -+ -+namespace { -+ -+class InstrAnalysisTest : public testing::TestWithParam { -+protected: -+ std::unique_ptr Info; -+ std::unique_ptr Analysis; -+ -+ static void SetUpTestSuite() { -+ LLVMInitializeLoongArchTargetInfo(); -+ LLVMInitializeLoongArchTarget(); -+ LLVMInitializeLoongArchTargetMC(); -+ } -+ -+ InstrAnalysisTest() { -+ std::string Error; -+ const Target *TheTarget = -+ TargetRegistry::lookupTarget(Triple::normalize(GetParam()), Error); -+ Info = std::unique_ptr(TheTarget->createMCInstrInfo()); -+ Analysis = std::unique_ptr( -+ TheTarget->createMCInstrAnalysis(Info.get())); -+ } -+}; -+ -+} // namespace -+ -+static MCInst beq() { -+ return MCInstBuilder(LoongArch::BEQ) -+ .addReg(LoongArch::R0) -+ .addReg(LoongArch::R1) -+ .addImm(32); -+} -+ -+static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } -+ -+static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { -+ return MCInstBuilder(LoongArch::JIRL).addReg(RD).addReg(RJ).addImm(16); -+} -+ -+TEST_P(InstrAnalysisTest, IsTerminator) { -+ EXPECT_TRUE(Analysis->isTerminator(beq())); -+ EXPECT_FALSE(Analysis->isTerminator(bl())); -+ EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); -+} -+ -+TEST_P(InstrAnalysisTest, IsCall) { -+ EXPECT_FALSE(Analysis->isCall(beq())); -+ EXPECT_TRUE(Analysis->isCall(bl())); -+ EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); -+ EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); -+} -+ -+TEST_P(InstrAnalysisTest, IsReturn) { -+ EXPECT_FALSE(Analysis->isReturn(beq())); -+ EXPECT_FALSE(Analysis->isReturn(bl())); -+ EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); -+ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R1))); -+} -+ -+TEST_P(InstrAnalysisTest, IsBranch) { -+ EXPECT_TRUE(Analysis->isBranch(beq())); -+ EXPECT_FALSE(Analysis->isBranch(bl())); -+ EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); -+ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); -+} -+ -+TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { -+ EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); -+ EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); -+ EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); -+ EXPECT_FALSE( -+ Analysis->isUnconditionalBranch(jirl(LoongArch::R0, LoongArch::R1))); -+} -+ -+TEST_P(InstrAnalysisTest, IsIndirectBranch) { -+ EXPECT_FALSE(Analysis->isIndirectBranch(beq())); -+ EXPECT_FALSE(Analysis->isIndirectBranch(bl())); -+ EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); -+ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R0, LoongArch::R1))); -+} -+ -+INSTANTIATE_TEST_SUITE_P(LA32And64, InstrAnalysisTest, -+ testing::Values("loongarch32", "loongarch64")); --- -2.20.1 - - -From 4d3ba0892d66b21f6a8a72f1d787e42a64be8867 Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Wed, 15 Nov 2023 11:12:30 +0800 -Subject: [PATCH 05/14] [LoongArch][NFC] Pre-commit MCInstrAnalysis tests for - instruction 'b' (#71903) - -The tests for 'b' which commented with FIXME are incorrect, the -following patch will fix it. - -(cherry picked from commit f6c4bb07eaa94bcd5d02ba7a46850225b6ed50d4) ---- - .../Target/LoongArch/MCInstrAnalysisTest.cpp | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -index 6a208d274a0d..6e1919fc2261 100644 ---- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -+++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -@@ -50,6 +50,8 @@ static MCInst beq() { - .addImm(32); - } - -+static MCInst b() { return MCInstBuilder(LoongArch::B).addImm(32); } -+ - static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } - - static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { -@@ -58,6 +60,7 @@ static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { - - TEST_P(InstrAnalysisTest, IsTerminator) { - EXPECT_TRUE(Analysis->isTerminator(beq())); -+ EXPECT_TRUE(Analysis->isTerminator(b())); - EXPECT_FALSE(Analysis->isTerminator(bl())); - EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); -@@ -65,6 +68,7 @@ TEST_P(InstrAnalysisTest, IsTerminator) { - - TEST_P(InstrAnalysisTest, IsCall) { - EXPECT_FALSE(Analysis->isCall(beq())); -+ EXPECT_FALSE(Analysis->isCall(b())); - EXPECT_TRUE(Analysis->isCall(bl())); - EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); - EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); -@@ -72,6 +76,7 @@ TEST_P(InstrAnalysisTest, IsCall) { - - TEST_P(InstrAnalysisTest, IsReturn) { - EXPECT_FALSE(Analysis->isReturn(beq())); -+ EXPECT_FALSE(Analysis->isReturn(b())); - EXPECT_FALSE(Analysis->isReturn(bl())); - EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); - EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); -@@ -80,14 +85,26 @@ TEST_P(InstrAnalysisTest, IsReturn) { - - TEST_P(InstrAnalysisTest, IsBranch) { - EXPECT_TRUE(Analysis->isBranch(beq())); -+ EXPECT_TRUE(Analysis->isBranch(b())); - EXPECT_FALSE(Analysis->isBranch(bl())); - EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); - EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); - } - -+TEST_P(InstrAnalysisTest, IsConditionalBranch) { -+ EXPECT_TRUE(Analysis->isConditionalBranch(beq())); -+ // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is -+ // wrong. The following patch will fix it. -+ EXPECT_TRUE(Analysis->isConditionalBranch(b())); -+ EXPECT_FALSE(Analysis->isConditionalBranch(bl())); -+} -+ - TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { - EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); -+ // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is -+ // wrong. The following patch will fix it. -+ EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); - EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); - EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); -@@ -97,6 +114,7 @@ TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { - - TEST_P(InstrAnalysisTest, IsIndirectBranch) { - EXPECT_FALSE(Analysis->isIndirectBranch(beq())); -+ EXPECT_FALSE(Analysis->isIndirectBranch(b())); - EXPECT_FALSE(Analysis->isIndirectBranch(bl())); - EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); --- -2.20.1 - - -From 034d4087be71c54248fff1bf7eae66291671776a Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Thu, 16 Nov 2023 14:01:58 +0800 -Subject: [PATCH 06/14] [LoongArch] Set isBarrier to true for instruction 'b' - (#72339) - -Instr "b offs26" represent to an unconditional branch in LoongArch. Set -isBarrier to 1 in tablegen for it, so that MCInstrAnalysis can return -correctly. - -Fixes https://github.com/llvm/llvm-project/pull/71903. - -(cherry picked from commit 42a4d5e8cab1537515d92ed56d6e17b673ed352f) ---- - llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 1 + - llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp | 8 ++------ - 2 files changed, 3 insertions(+), 6 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index 166379d7d592..05ae36a9781d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -586,6 +586,7 @@ class Br_I26 op> - : FmtI26 { - let isBranch = 1; - let isTerminator = 1; -+ let isBarrier = 1; - } - } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 - -diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -index 6e1919fc2261..468ee79615d6 100644 ---- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -+++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -@@ -94,17 +94,13 @@ TEST_P(InstrAnalysisTest, IsBranch) { - - TEST_P(InstrAnalysisTest, IsConditionalBranch) { - EXPECT_TRUE(Analysis->isConditionalBranch(beq())); -- // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is -- // wrong. The following patch will fix it. -- EXPECT_TRUE(Analysis->isConditionalBranch(b())); -+ EXPECT_FALSE(Analysis->isConditionalBranch(b())); - EXPECT_FALSE(Analysis->isConditionalBranch(bl())); - } - - TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { - EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); -- // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is -- // wrong. The following patch will fix it. -- EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); -+ EXPECT_TRUE(Analysis->isUnconditionalBranch(b())); - EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); - EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); --- -2.20.1 - - -From 701109dc419b8d07cd5254268d848dee1278b9ad Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Tue, 21 Nov 2023 08:34:52 +0800 -Subject: [PATCH 07/14] [LoongArch][MC] Pre-commit tests for instr bl fixupkind - testing (#72826) - -This patch is used to test whether fixupkind for bl can be returned -correctly. When BL has target-flags(loongarch-call), there is no error. -But without this flag, an assertion error will appear. So the test is -just tagged as "Expectedly Failed" now until the following patch fix it. - -(cherry picked from commit 2ca028ce7c6de5f1350440012355a65383b8729a) ---- - .../CodeGen/LoongArch/test_bl_fixupkind.mir | 66 +++++++++++++++++++ - 1 file changed, 66 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir - -diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -new file mode 100644 -index 000000000000..2c1d41be7711 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -@@ -0,0 +1,66 @@ -+## Tagged as "Expectedly Failed" until the following patch fix it -+# XFAIL: * -+# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ -+# RUN: llvm-objdump -d - | FileCheck %s -+ -+# REQUIRES: asserts -+ -+## Check that bl can get fixupkind correctly. -+## When BL has target-flags(loongarch-call), there is no error. But without -+## this flag, an assertion error will appear: -+## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. -+ -+--- | -+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" -+ target triple = "loongarch64" -+ -+ define dso_local void @test_bl_fixupkind_with_flag() { -+ ; CHECK-LABEL: test_bl_fixupkind_with_flag -+ ; CHECK: addi.d $sp, $sp, -16 -+ ; CHECK-NEXT: st.d $ra, $sp, 8 -+ ; CHECK-NEXT: bl 0 -+ ; CHECK-NEXT: ld.d $ra, $sp, 8 -+ ; CHECK-NEXT: addi.d $sp, $sp, 16 -+ ; CHECK-NEXT: ret -+ entry: -+ call void @foo() -+ ret void -+ } -+ -+ define dso_local void @test_bl_fixupkind_without_flag() { -+ ; CHECK-LABEL: test_bl_fixupkind_without_flag -+ ; CHECK: addi.d $sp, $sp, -16 -+ ; CHECK-NEXT: st.d $ra, $sp, 8 -+ ; CHECK-NEXT: bl 0 -+ ; CHECK-NEXT: ld.d $ra, $sp, 8 -+ ; CHECK-NEXT: addi.d $sp, $sp, 16 -+ ; CHECK-NEXT: ret -+ entry: -+ call void @foo() -+ ret void -+ } -+ -+ declare dso_local void @foo(...) -+... -+--- -+name: test_bl_fixupkind_with_flag -+tracksRegLiveness: true -+body: | -+ bb.0.entry: -+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 -+ BL target-flags(loongarch-call) @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 -+ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 -+ PseudoRET -+ -+... -+--- -+name: test_bl_fixupkind_without_flag -+tracksRegLiveness: true -+body: | -+ bb.0.entry: -+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 -+ BL @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 -+ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 -+ PseudoRET -+ -+... --- -2.20.1 - - -From a5bf03107b8738b0fab521d7718bed863056134b Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Tue, 21 Nov 2023 19:00:29 +0800 -Subject: [PATCH 08/14] [LoongArch][MC] Support to get the FixupKind for BL - (#72938) - -Previously, bolt could not get FixupKind for BL correctly, because bolt -cannot get target-flags for BL. Here just add support in MCCodeEmitter. - -Fixes https://github.com/llvm/llvm-project/pull/72826. - -(cherry picked from commit 775d2f3201cf7fb657aaf58d1b37c130bd9eb8f9) ---- - .../LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp | 1 + - llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir | 8 ++------ - 2 files changed, 3 insertions(+), 6 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -index 08c0820cb862..09d92ac9aa3a 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -@@ -263,6 +263,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, - FixupKind = LoongArch::fixup_loongarch_b21; - break; - case LoongArch::B: -+ case LoongArch::BL: - FixupKind = LoongArch::fixup_loongarch_b26; - break; - } -diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -index 2c1d41be7711..70cd5fb8d7eb 100644 ---- a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -+++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -@@ -1,14 +1,10 @@ --## Tagged as "Expectedly Failed" until the following patch fix it --# XFAIL: * - # RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ - # RUN: llvm-objdump -d - | FileCheck %s - - # REQUIRES: asserts - --## Check that bl can get fixupkind correctly. --## When BL has target-flags(loongarch-call), there is no error. But without --## this flag, an assertion error will appear: --## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. -+## Check that bl can get fixupkind correctly, whether BL contains -+## target-flags(loongarch-call) or not. - - --- | - target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" --- -2.20.1 - - -From 20421e57af53d963a95c6c318f71f9399d241188 Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Thu, 23 Nov 2023 16:38:41 +0800 -Subject: [PATCH 09/14] [LoongArch][MC] Modify branch evaluation for - MCInstrAnalysis (#73205) - -Function evaluateBranch() is used to compute target address for a given -branch instruction and return true on success. But target address of -indirect branch cannot be simply added, so rule it out and just return -false. - -This patch also add objdump tests which capture the current state of -support for printing branch targets. Without this patch, the result of -"jirl $zero, $a0, 4" is "jirl $zero, $a0, 4 ". It is obviously -incorrect, because this instruction represents an indirect branch whose -target address depends on both the register value and the imm. After -this patch, it will be right despite loss of details. - -(cherry picked from commit 1c68c4c57a65a67963264878bc4646be8b58854c) ---- - .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 3 +- - .../llvm-objdump/ELF/LoongArch/branches.s | 76 +++++++++++++++++++ - .../llvm-objdump/ELF/LoongArch/lit.local.cfg | 2 + - 3 files changed, 80 insertions(+), 1 deletion(-) - create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s - create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -index d580c3457fec..a4e6a09863e6 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -@@ -97,7 +97,8 @@ public: - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { - unsigned NumOps = Inst.getNumOperands(); -- if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { -+ if ((isBranch(Inst) && !isIndirectBranch(Inst)) || -+ Inst.getOpcode() == LoongArch::BL) { - Target = Addr + Inst.getOperand(NumOps - 1).getImm(); - return true; - } -diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s -new file mode 100644 -index 000000000000..8cb00aef9954 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s -@@ -0,0 +1,76 @@ -+# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | \ -+# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s -+# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | \ -+# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s -+ -+# CHECK-LABEL: : -+foo: -+# CHECK: beq $a0, $a1, 108 -+beq $a0, $a1, .Llocal -+# CHECK: bne $a0, $a1, 104 -+bne $a0, $a1, .Llocal -+# CHECK: blt $a0, $a1, 100 -+blt $a0, $a1, .Llocal -+# CHECK: bltu $a0, $a1, 96 -+bltu $a0, $a1, .Llocal -+# CHECK: bge $a0, $a1, 92 -+bge $a0, $a1, .Llocal -+# CHECK: bgeu $a0, $a1, 88 -+bgeu $a0, $a1, .Llocal -+# CHECK: beqz $a0, 84 -+beqz $a0, .Llocal -+# CHECK: bnez $a0, 80 -+bnez $a0, .Llocal -+# CHECK: bceqz $fcc6, 76 -+bceqz $fcc6, .Llocal -+# CHECK: bcnez $fcc6, 72 -+bcnez $fcc6, .Llocal -+ -+# CHECK: beq $a0, $a1, 76 -+beq $a0, $a1, bar -+# CHECK: bne $a0, $a1, 72 -+bne $a0, $a1, bar -+# CHECK: blt $a0, $a1, 68 -+blt $a0, $a1, bar -+# CHECK: bltu $a0, $a1, 64 -+bltu $a0, $a1, bar -+# CHECK: bge $a0, $a1, 60 -+bge $a0, $a1, bar -+# CHECK: bgeu $a0, $a1, 56 -+bgeu $a0, $a1, bar -+# CHECK: beqz $a0, 52 -+beqz $a0, bar -+# CHECK: bnez $a0, 48 -+bnez $a0, bar -+# CHECK: bceqz $fcc6, 44 -+bceqz $fcc6, bar -+# CHECK: bcnez $fcc6, 40 -+bcnez $fcc6, bar -+ -+# CHECK: b 28 -+b .Llocal -+# CHECK: b 32 -+b bar -+ -+# CHECK: bl 20 -+bl .Llocal -+# CHECK: bl 24 -+bl bar -+ -+# CHECK: jirl $zero, $a0, 4{{$}} -+jirl $zero, $a0, 4 -+# CHECK: jirl $ra, $a0, 4{{$}} -+jirl $ra, $a0, 4 -+# CHECK: ret -+ret -+ -+.Llocal: -+# CHECK: 6c: nop -+# CHECK: nop -+nop -+nop -+ -+# CHECK-LABEL: : -+bar: -+# CHECK: 74: nop -+nop -diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg -new file mode 100644 -index 000000000000..cc24278acbb4 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg -@@ -0,0 +1,2 @@ -+if not "LoongArch" in config.root.targets: -+ config.unsupported = True --- -2.20.1 - - -From 0fe85205a8637c6671f423cddd41b712085232ac Mon Sep 17 00:00:00 2001 -From: hev -Date: Thu, 23 Nov 2023 15:15:26 +0800 -Subject: [PATCH 10/14] [LoongArch] Precommit a test for smul with overflow - (NFC) (#73212) - -(cherry picked from commit 7414c0db962f8a5029fd44c3e0bc93d9ce20be71) ---- - .../CodeGen/LoongArch/smul-with-overflow.ll | 118 ++++++++++++++++++ - 1 file changed, 118 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/smul-with-overflow.ll - -diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -new file mode 100644 -index 000000000000..a53e77e5aa4b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -@@ -0,0 +1,118 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 -+ -+define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { -+; LA32-LABEL: smuloi64: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: .cfi_def_cfa_offset 16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: .cfi_offset 22, -8 -+; LA32-NEXT: move $fp, $a4 -+; LA32-NEXT: st.w $zero, $sp, 4 -+; LA32-NEXT: addi.w $a4, $sp, 4 -+; LA32-NEXT: bl %plt(__mulodi4) -+; LA32-NEXT: st.w $a1, $fp, 4 -+; LA32-NEXT: st.w $a0, $fp, 0 -+; LA32-NEXT: ld.w $a0, $sp, 4 -+; LA32-NEXT: sltu $a0, $zero, $a0 -+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: smuloi64: -+; LA64: # %bb.0: -+; LA64-NEXT: mul.d $a3, $a0, $a1 -+; LA64-NEXT: st.d $a3, $a2, 0 -+; LA64-NEXT: mulh.d $a0, $a0, $a1 -+; LA64-NEXT: srai.d $a1, $a3, 63 -+; LA64-NEXT: xor $a0, $a0, $a1 -+; LA64-NEXT: sltu $a0, $zero, $a0 -+; LA64-NEXT: ret -+ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) -+ %val = extractvalue {i64, i1} %t, 0 -+ %obit = extractvalue {i64, i1} %t, 1 -+ store i64 %val, ptr %res -+ ret i1 %obit -+} -+ -+define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { -+; LA32-LABEL: smuloi128: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -64 -+; LA32-NEXT: .cfi_def_cfa_offset 64 -+; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill -+; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: .cfi_offset 22, -8 -+; LA32-NEXT: move $fp, $a2 -+; LA32-NEXT: st.w $zero, $sp, 52 -+; LA32-NEXT: ld.w $a2, $a1, 12 -+; LA32-NEXT: st.w $a2, $sp, 12 -+; LA32-NEXT: ld.w $a2, $a1, 8 -+; LA32-NEXT: st.w $a2, $sp, 8 -+; LA32-NEXT: ld.w $a2, $a1, 4 -+; LA32-NEXT: st.w $a2, $sp, 4 -+; LA32-NEXT: ld.w $a1, $a1, 0 -+; LA32-NEXT: st.w $a1, $sp, 0 -+; LA32-NEXT: ld.w $a1, $a0, 12 -+; LA32-NEXT: st.w $a1, $sp, 28 -+; LA32-NEXT: ld.w $a1, $a0, 8 -+; LA32-NEXT: st.w $a1, $sp, 24 -+; LA32-NEXT: ld.w $a1, $a0, 4 -+; LA32-NEXT: st.w $a1, $sp, 20 -+; LA32-NEXT: ld.w $a0, $a0, 0 -+; LA32-NEXT: st.w $a0, $sp, 16 -+; LA32-NEXT: addi.w $a0, $sp, 32 -+; LA32-NEXT: addi.w $a1, $sp, 16 -+; LA32-NEXT: addi.w $a2, $sp, 0 -+; LA32-NEXT: addi.w $a3, $sp, 52 -+; LA32-NEXT: bl %plt(__muloti4) -+; LA32-NEXT: ld.w $a0, $sp, 44 -+; LA32-NEXT: st.w $a0, $fp, 12 -+; LA32-NEXT: ld.w $a0, $sp, 40 -+; LA32-NEXT: st.w $a0, $fp, 8 -+; LA32-NEXT: ld.w $a0, $sp, 36 -+; LA32-NEXT: st.w $a0, $fp, 4 -+; LA32-NEXT: ld.w $a0, $sp, 32 -+; LA32-NEXT: st.w $a0, $fp, 0 -+; LA32-NEXT: ld.w $a0, $sp, 52 -+; LA32-NEXT: sltu $a0, $zero, $a0 -+; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 64 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: smuloi128: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.d $sp, $sp, -32 -+; LA64-NEXT: .cfi_def_cfa_offset 32 -+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill -+; LA64-NEXT: .cfi_offset 1, -8 -+; LA64-NEXT: .cfi_offset 22, -16 -+; LA64-NEXT: move $fp, $a4 -+; LA64-NEXT: st.d $zero, $sp, 8 -+; LA64-NEXT: addi.d $a4, $sp, 8 -+; LA64-NEXT: bl %plt(__muloti4) -+; LA64-NEXT: st.d $a1, $fp, 8 -+; LA64-NEXT: st.d $a0, $fp, 0 -+; LA64-NEXT: ld.d $a0, $sp, 8 -+; LA64-NEXT: sltu $a0, $zero, $a0 -+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload -+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload -+; LA64-NEXT: addi.d $sp, $sp, 32 -+; LA64-NEXT: ret -+ %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) -+ %val = extractvalue {i128, i1} %t, 0 -+ %obit = extractvalue {i128, i1} %t, 1 -+ store i128 %val, ptr %res -+ ret i1 %obit -+} -+ -+declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone -+declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone --- -2.20.1 - - -From e29ff285726046ec46c9005c67ba992e3efc8ace Mon Sep 17 00:00:00 2001 -From: hev -Date: Thu, 23 Nov 2023 19:34:50 +0800 -Subject: [PATCH 11/14] [LoongArch] Disable mulodi4 and muloti4 libcalls - (#73199) - -This library function only exists in compiler-rt not libgcc. So this -would fail to link unless we were linking with compiler-rt. - -Fixes https://github.com/ClangBuiltLinux/linux/issues/1958 - -(cherry picked from commit 0d9f557b6c36da3aa92daff4c0d37ea821d7ae1e) ---- - .../LoongArch/LoongArchISelLowering.cpp | 5 + - .../CodeGen/LoongArch/smul-with-overflow.ll | 463 +++++++++++++++--- - 2 files changed, 397 insertions(+), 71 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index f7eacd56c542..ed106cb766bc 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -152,8 +152,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - - // Set libcalls. - setLibcallName(RTLIB::MUL_I128, nullptr); -+ // The MULO libcall is not part of libgcc, only compiler-rt. -+ setLibcallName(RTLIB::MULO_I64, nullptr); - } - -+ // The MULO libcall is not part of libgcc, only compiler-rt. -+ setLibcallName(RTLIB::MULO_I128, nullptr); -+ - static const ISD::CondCode FPCCToExpand[] = { - ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, - ISD::SETGE, ISD::SETNE, ISD::SETGT}; -diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -index a53e77e5aa4b..6cba4108d63c 100644 ---- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -@@ -5,23 +5,53 @@ - define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { - ; LA32-LABEL: smuloi64: - ; LA32: # %bb.0: --; LA32-NEXT: addi.w $sp, $sp, -16 --; LA32-NEXT: .cfi_def_cfa_offset 16 --; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill --; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill --; LA32-NEXT: .cfi_offset 1, -4 --; LA32-NEXT: .cfi_offset 22, -8 --; LA32-NEXT: move $fp, $a4 --; LA32-NEXT: st.w $zero, $sp, 4 --; LA32-NEXT: addi.w $a4, $sp, 4 --; LA32-NEXT: bl %plt(__mulodi4) --; LA32-NEXT: st.w $a1, $fp, 4 --; LA32-NEXT: st.w $a0, $fp, 0 --; LA32-NEXT: ld.w $a0, $sp, 4 -+; LA32-NEXT: srai.w $a5, $a1, 31 -+; LA32-NEXT: mul.w $a6, $a2, $a5 -+; LA32-NEXT: mulh.wu $a7, $a2, $a5 -+; LA32-NEXT: add.w $a7, $a7, $a6 -+; LA32-NEXT: mul.w $a5, $a3, $a5 -+; LA32-NEXT: add.w $a5, $a7, $a5 -+; LA32-NEXT: srai.w $a7, $a3, 31 -+; LA32-NEXT: mul.w $t0, $a7, $a1 -+; LA32-NEXT: mulh.wu $t1, $a7, $a0 -+; LA32-NEXT: add.w $t0, $t1, $t0 -+; LA32-NEXT: mul.w $a7, $a7, $a0 -+; LA32-NEXT: add.w $t0, $t0, $a7 -+; LA32-NEXT: add.w $a5, $t0, $a5 -+; LA32-NEXT: mulh.wu $t0, $a0, $a2 -+; LA32-NEXT: mul.w $t1, $a1, $a2 -+; LA32-NEXT: add.w $t0, $t1, $t0 -+; LA32-NEXT: sltu $t1, $t0, $t1 -+; LA32-NEXT: mulh.wu $t2, $a1, $a2 -+; LA32-NEXT: add.w $t1, $t2, $t1 -+; LA32-NEXT: mul.w $t2, $a0, $a3 -+; LA32-NEXT: add.w $t0, $t2, $t0 -+; LA32-NEXT: sltu $t2, $t0, $t2 -+; LA32-NEXT: mulh.wu $t3, $a0, $a3 -+; LA32-NEXT: add.w $t2, $t3, $t2 -+; LA32-NEXT: add.w $a6, $a7, $a6 -+; LA32-NEXT: sltu $a7, $a6, $a7 -+; LA32-NEXT: add.w $a5, $a5, $a7 -+; LA32-NEXT: mul.w $a0, $a0, $a2 -+; LA32-NEXT: mul.w $a2, $a1, $a3 -+; LA32-NEXT: mulh.wu $a1, $a1, $a3 -+; LA32-NEXT: add.w $a3, $t1, $t2 -+; LA32-NEXT: sltu $a7, $a3, $t1 -+; LA32-NEXT: add.w $a1, $a1, $a7 -+; LA32-NEXT: st.w $a0, $a4, 0 -+; LA32-NEXT: add.w $a0, $a2, $a3 -+; LA32-NEXT: sltu $a2, $a0, $a2 -+; LA32-NEXT: add.w $a1, $a1, $a2 -+; LA32-NEXT: st.w $t0, $a4, 4 -+; LA32-NEXT: add.w $a1, $a1, $a5 -+; LA32-NEXT: add.w $a2, $a0, $a6 -+; LA32-NEXT: sltu $a0, $a2, $a0 -+; LA32-NEXT: add.w $a0, $a1, $a0 -+; LA32-NEXT: srai.w $a1, $t0, 31 -+; LA32-NEXT: xor $a0, $a0, $a1 -+; LA32-NEXT: xor $a1, $a2, $a1 -+; LA32-NEXT: or $a0, $a1, $a0 - ; LA32-NEXT: sltu $a0, $zero, $a0 --; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload --; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload --; LA32-NEXT: addi.w $sp, $sp, 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: smuloi64: -@@ -43,69 +73,360 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { - define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { - ; LA32-LABEL: smuloi128: - ; LA32: # %bb.0: --; LA32-NEXT: addi.w $sp, $sp, -64 --; LA32-NEXT: .cfi_def_cfa_offset 64 --; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill --; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill -+; LA32-NEXT: addi.w $sp, $sp, -96 -+; LA32-NEXT: .cfi_def_cfa_offset 96 -+; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill -+; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill - ; LA32-NEXT: .cfi_offset 1, -4 - ; LA32-NEXT: .cfi_offset 22, -8 --; LA32-NEXT: move $fp, $a2 --; LA32-NEXT: st.w $zero, $sp, 52 --; LA32-NEXT: ld.w $a2, $a1, 12 --; LA32-NEXT: st.w $a2, $sp, 12 --; LA32-NEXT: ld.w $a2, $a1, 8 --; LA32-NEXT: st.w $a2, $sp, 8 --; LA32-NEXT: ld.w $a2, $a1, 4 --; LA32-NEXT: st.w $a2, $sp, 4 --; LA32-NEXT: ld.w $a1, $a1, 0 --; LA32-NEXT: st.w $a1, $sp, 0 --; LA32-NEXT: ld.w $a1, $a0, 12 --; LA32-NEXT: st.w $a1, $sp, 28 --; LA32-NEXT: ld.w $a1, $a0, 8 --; LA32-NEXT: st.w $a1, $sp, 24 --; LA32-NEXT: ld.w $a1, $a0, 4 --; LA32-NEXT: st.w $a1, $sp, 20 --; LA32-NEXT: ld.w $a0, $a0, 0 --; LA32-NEXT: st.w $a0, $sp, 16 --; LA32-NEXT: addi.w $a0, $sp, 32 --; LA32-NEXT: addi.w $a1, $sp, 16 --; LA32-NEXT: addi.w $a2, $sp, 0 --; LA32-NEXT: addi.w $a3, $sp, 52 --; LA32-NEXT: bl %plt(__muloti4) --; LA32-NEXT: ld.w $a0, $sp, 44 --; LA32-NEXT: st.w $a0, $fp, 12 --; LA32-NEXT: ld.w $a0, $sp, 40 --; LA32-NEXT: st.w $a0, $fp, 8 --; LA32-NEXT: ld.w $a0, $sp, 36 --; LA32-NEXT: st.w $a0, $fp, 4 --; LA32-NEXT: ld.w $a0, $sp, 32 --; LA32-NEXT: st.w $a0, $fp, 0 --; LA32-NEXT: ld.w $a0, $sp, 52 -+; LA32-NEXT: .cfi_offset 23, -12 -+; LA32-NEXT: .cfi_offset 24, -16 -+; LA32-NEXT: .cfi_offset 25, -20 -+; LA32-NEXT: .cfi_offset 26, -24 -+; LA32-NEXT: .cfi_offset 27, -28 -+; LA32-NEXT: .cfi_offset 28, -32 -+; LA32-NEXT: .cfi_offset 29, -36 -+; LA32-NEXT: .cfi_offset 30, -40 -+; LA32-NEXT: .cfi_offset 31, -44 -+; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ld.w $a6, $a1, 0 -+; LA32-NEXT: ld.w $a7, $a0, 0 -+; LA32-NEXT: mulh.wu $a3, $a7, $a6 -+; LA32-NEXT: ld.w $a5, $a0, 4 -+; LA32-NEXT: mul.w $a4, $a5, $a6 -+; LA32-NEXT: add.w $a3, $a4, $a3 -+; LA32-NEXT: sltu $a4, $a3, $a4 -+; LA32-NEXT: mulh.wu $t0, $a5, $a6 -+; LA32-NEXT: add.w $a4, $t0, $a4 -+; LA32-NEXT: ld.w $t0, $a1, 4 -+; LA32-NEXT: mul.w $t1, $a7, $t0 -+; LA32-NEXT: add.w $a3, $t1, $a3 -+; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill -+; LA32-NEXT: sltu $t1, $a3, $t1 -+; LA32-NEXT: mulh.wu $t2, $a7, $t0 -+; LA32-NEXT: add.w $t1, $t2, $t1 -+; LA32-NEXT: ld.w $t4, $a0, 12 -+; LA32-NEXT: ld.w $t2, $a0, 8 -+; LA32-NEXT: ld.w $t3, $a1, 8 -+; LA32-NEXT: mulh.wu $a0, $t2, $t3 -+; LA32-NEXT: mul.w $t5, $t4, $t3 -+; LA32-NEXT: add.w $a0, $t5, $a0 -+; LA32-NEXT: sltu $t5, $a0, $t5 -+; LA32-NEXT: mulh.wu $t6, $t4, $t3 -+; LA32-NEXT: add.w $t5, $t6, $t5 -+; LA32-NEXT: ld.w $t7, $a1, 12 -+; LA32-NEXT: mul.w $a1, $t2, $t7 -+; LA32-NEXT: add.w $a0, $a1, $a0 -+; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill -+; LA32-NEXT: sltu $a1, $a0, $a1 -+; LA32-NEXT: mulh.wu $t6, $t2, $t7 -+; LA32-NEXT: add.w $t6, $t6, $a1 -+; LA32-NEXT: srai.w $s7, $t4, 31 -+; LA32-NEXT: mul.w $a1, $s7, $t7 -+; LA32-NEXT: mulh.wu $t8, $s7, $t3 -+; LA32-NEXT: add.w $t8, $t8, $a1 -+; LA32-NEXT: mulh.wu $fp, $a6, $s7 -+; LA32-NEXT: mul.w $s6, $t0, $s7 -+; LA32-NEXT: add.w $s8, $s6, $fp -+; LA32-NEXT: mul.w $a1, $a6, $s7 -+; LA32-NEXT: add.w $ra, $a1, $s8 -+; LA32-NEXT: sltu $s0, $ra, $a1 -+; LA32-NEXT: add.w $a0, $fp, $s0 -+; LA32-NEXT: add.w $a3, $a4, $t1 -+; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill -+; LA32-NEXT: sltu $a4, $a3, $a4 -+; LA32-NEXT: mulh.wu $t1, $a5, $t0 -+; LA32-NEXT: add.w $a3, $t1, $a4 -+; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill -+; LA32-NEXT: srai.w $s4, $t7, 31 -+; LA32-NEXT: mul.w $fp, $a7, $s4 -+; LA32-NEXT: mulh.wu $a4, $a7, $s4 -+; LA32-NEXT: add.w $s1, $a4, $fp -+; LA32-NEXT: sltu $s0, $s1, $fp -+; LA32-NEXT: add.w $s5, $a4, $s0 -+; LA32-NEXT: mul.w $a4, $s7, $t3 -+; LA32-NEXT: add.w $t8, $t8, $a4 -+; LA32-NEXT: add.w $s0, $ra, $t8 -+; LA32-NEXT: add.w $a3, $a1, $a4 -+; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill -+; LA32-NEXT: sltu $a4, $a3, $a1 -+; LA32-NEXT: add.w $a3, $s0, $a4 -+; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill -+; LA32-NEXT: add.w $s3, $t5, $t6 -+; LA32-NEXT: sltu $a4, $s3, $t5 -+; LA32-NEXT: mulh.wu $t5, $t4, $t7 -+; LA32-NEXT: add.w $a3, $t5, $a4 -+; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill -+; LA32-NEXT: mul.w $a4, $a7, $a6 -+; LA32-NEXT: st.w $a4, $a2, 0 -+; LA32-NEXT: sltu $a4, $s8, $s6 -+; LA32-NEXT: mulh.wu $t5, $t0, $s7 -+; LA32-NEXT: add.w $a4, $t5, $a4 -+; LA32-NEXT: add.w $t1, $a4, $a0 -+; LA32-NEXT: sltu $a4, $t1, $a4 -+; LA32-NEXT: add.w $s2, $t5, $a4 -+; LA32-NEXT: mulh.wu $a4, $a7, $t3 -+; LA32-NEXT: mul.w $t5, $a5, $t3 -+; LA32-NEXT: add.w $a4, $t5, $a4 -+; LA32-NEXT: sltu $t5, $a4, $t5 -+; LA32-NEXT: mulh.wu $t6, $a5, $t3 -+; LA32-NEXT: add.w $a3, $t6, $t5 -+; LA32-NEXT: mul.w $t6, $a7, $t7 -+; LA32-NEXT: add.w $t5, $t6, $a4 -+; LA32-NEXT: sltu $a4, $t5, $t6 -+; LA32-NEXT: mulh.wu $t6, $a7, $t7 -+; LA32-NEXT: add.w $a4, $t6, $a4 -+; LA32-NEXT: mulh.wu $t6, $t2, $a6 -+; LA32-NEXT: mul.w $s7, $t4, $a6 -+; LA32-NEXT: add.w $t6, $s7, $t6 -+; LA32-NEXT: sltu $s7, $t6, $s7 -+; LA32-NEXT: mulh.wu $s8, $t4, $a6 -+; LA32-NEXT: add.w $a0, $s8, $s7 -+; LA32-NEXT: mul.w $s7, $t2, $t0 -+; LA32-NEXT: add.w $t6, $s7, $t6 -+; LA32-NEXT: sltu $s7, $t6, $s7 -+; LA32-NEXT: mulh.wu $s8, $t2, $t0 -+; LA32-NEXT: add.w $a2, $s8, $s7 -+; LA32-NEXT: mul.w $s8, $a5, $s4 -+; LA32-NEXT: add.w $s7, $s1, $s8 -+; LA32-NEXT: add.w $s1, $s7, $ra -+; LA32-NEXT: add.w $a1, $fp, $a1 -+; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill -+; LA32-NEXT: sltu $ra, $a1, $fp -+; LA32-NEXT: add.w $a1, $s1, $ra -+; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill -+; LA32-NEXT: xor $s0, $a1, $s7 -+; LA32-NEXT: sltui $s0, $s0, 1 -+; LA32-NEXT: sltu $a1, $a1, $s7 -+; LA32-NEXT: masknez $s1, $a1, $s0 -+; LA32-NEXT: maskeqz $s0, $ra, $s0 -+; LA32-NEXT: add.w $t1, $s6, $t1 -+; LA32-NEXT: sltu $s6, $t1, $s6 -+; LA32-NEXT: add.w $s2, $s2, $s6 -+; LA32-NEXT: add.w $a2, $a0, $a2 -+; LA32-NEXT: sltu $a0, $a2, $a0 -+; LA32-NEXT: mulh.wu $s6, $t4, $t0 -+; LA32-NEXT: add.w $t8, $s6, $a0 -+; LA32-NEXT: add.w $a4, $a3, $a4 -+; LA32-NEXT: sltu $a3, $a4, $a3 -+; LA32-NEXT: mulh.wu $s6, $a5, $t7 -+; LA32-NEXT: add.w $a3, $s6, $a3 -+; LA32-NEXT: mul.w $s6, $t4, $t7 -+; LA32-NEXT: mul.w $t7, $a5, $t7 -+; LA32-NEXT: mul.w $ra, $t4, $t0 -+; LA32-NEXT: mul.w $t0, $a5, $t0 -+; LA32-NEXT: mul.w $t4, $t4, $s4 -+; LA32-NEXT: mul.w $a7, $a7, $t3 -+; LA32-NEXT: mul.w $a6, $t2, $a6 -+; LA32-NEXT: mul.w $t3, $t2, $t3 -+; LA32-NEXT: mul.w $a0, $t2, $s4 -+; LA32-NEXT: mulh.wu $t2, $t2, $s4 -+; LA32-NEXT: mulh.wu $a5, $s4, $a5 -+; LA32-NEXT: sltu $s4, $s7, $s8 -+; LA32-NEXT: add.w $s4, $a5, $s4 -+; LA32-NEXT: add.w $s4, $s5, $s4 -+; LA32-NEXT: sltu $s5, $s4, $s5 -+; LA32-NEXT: add.w $s5, $a5, $s5 -+; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a1, $t0, $a1 -+; LA32-NEXT: sltu $a5, $a1, $t0 -+; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload -+; LA32-NEXT: add.w $t0, $t0, $a5 -+; LA32-NEXT: or $s0, $s0, $s1 -+; LA32-NEXT: add.w $a4, $t7, $a4 -+; LA32-NEXT: sltu $a5, $a4, $t7 -+; LA32-NEXT: add.w $t7, $a3, $a5 -+; LA32-NEXT: add.w $s1, $ra, $a2 -+; LA32-NEXT: sltu $a2, $s1, $ra -+; LA32-NEXT: add.w $t8, $t8, $a2 -+; LA32-NEXT: add.w $a5, $s6, $s3 -+; LA32-NEXT: sltu $a2, $a5, $s6 -+; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a2, $a3, $a2 -+; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload -+; LA32-NEXT: st.w $a3, $s6, 4 -+; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a3, $s2, $a3 -+; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload -+; LA32-NEXT: add.w $s2, $t1, $s2 -+; LA32-NEXT: sltu $t1, $s2, $t1 -+; LA32-NEXT: add.w $a3, $a3, $t1 -+; LA32-NEXT: add.w $t1, $s8, $s4 -+; LA32-NEXT: sltu $s3, $t1, $s8 -+; LA32-NEXT: add.w $s3, $s5, $s3 -+; LA32-NEXT: add.w $t2, $t2, $a0 -+; LA32-NEXT: add.w $t2, $t2, $t4 -+; LA32-NEXT: add.w $t2, $t2, $s7 -+; LA32-NEXT: add.w $t4, $a0, $fp -+; LA32-NEXT: sltu $a0, $t4, $a0 -+; LA32-NEXT: add.w $a0, $t2, $a0 -+; LA32-NEXT: add.w $a0, $s3, $a0 -+; LA32-NEXT: add.w $t2, $t1, $t4 -+; LA32-NEXT: sltu $t1, $t2, $t1 -+; LA32-NEXT: add.w $a0, $a0, $t1 -+; LA32-NEXT: add.w $a0, $a0, $a3 -+; LA32-NEXT: add.w $t1, $t2, $s2 -+; LA32-NEXT: sltu $a3, $t1, $t2 -+; LA32-NEXT: add.w $a0, $a0, $a3 -+; LA32-NEXT: add.w $a3, $t6, $t0 -+; LA32-NEXT: add.w $a1, $a6, $a1 -+; LA32-NEXT: sltu $a6, $a1, $a6 -+; LA32-NEXT: add.w $t0, $a3, $a6 -+; LA32-NEXT: add.w $a1, $a7, $a1 -+; LA32-NEXT: sltu $a7, $a1, $a7 -+; LA32-NEXT: add.w $a3, $t5, $t0 -+; LA32-NEXT: add.w $a3, $a3, $a7 -+; LA32-NEXT: sltu $t2, $a3, $t5 -+; LA32-NEXT: xor $t4, $a3, $t5 -+; LA32-NEXT: sltui $t4, $t4, 1 -+; LA32-NEXT: masknez $t2, $t2, $t4 -+; LA32-NEXT: maskeqz $a7, $a7, $t4 -+; LA32-NEXT: st.w $a1, $s6, 8 -+; LA32-NEXT: or $a1, $a7, $t2 -+; LA32-NEXT: sltu $a7, $t0, $t6 -+; LA32-NEXT: xor $t0, $t0, $t6 -+; LA32-NEXT: sltui $t0, $t0, 1 -+; LA32-NEXT: masknez $a7, $a7, $t0 -+; LA32-NEXT: maskeqz $a6, $a6, $t0 -+; LA32-NEXT: or $a6, $a6, $a7 -+; LA32-NEXT: add.w $a6, $s1, $a6 -+; LA32-NEXT: sltu $a7, $a6, $s1 -+; LA32-NEXT: add.w $a7, $t8, $a7 -+; LA32-NEXT: add.w $a1, $a4, $a1 -+; LA32-NEXT: sltu $a4, $a1, $a4 -+; LA32-NEXT: add.w $a4, $t7, $a4 -+; LA32-NEXT: add.w $t0, $t1, $s0 -+; LA32-NEXT: sltu $t1, $t0, $t1 -+; LA32-NEXT: add.w $a0, $a0, $t1 -+; LA32-NEXT: st.w $a3, $s6, 12 -+; LA32-NEXT: add.w $a1, $a6, $a1 -+; LA32-NEXT: sltu $a6, $a1, $a6 -+; LA32-NEXT: add.w $a4, $a7, $a4 -+; LA32-NEXT: add.w $a4, $a4, $a6 -+; LA32-NEXT: sltu $t1, $a4, $a7 -+; LA32-NEXT: xor $a7, $a4, $a7 -+; LA32-NEXT: sltui $a7, $a7, 1 -+; LA32-NEXT: masknez $t1, $t1, $a7 -+; LA32-NEXT: maskeqz $a6, $a6, $a7 -+; LA32-NEXT: or $a6, $a6, $t1 -+; LA32-NEXT: add.w $a6, $a5, $a6 -+; LA32-NEXT: sltu $a5, $a6, $a5 -+; LA32-NEXT: add.w $a2, $a2, $a5 -+; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a4, $t1, $a4 -+; LA32-NEXT: add.w $a1, $t3, $a1 -+; LA32-NEXT: sltu $a5, $a1, $t3 -+; LA32-NEXT: add.w $a4, $a4, $a5 -+; LA32-NEXT: sltu $a7, $a4, $t1 -+; LA32-NEXT: xor $t1, $a4, $t1 -+; LA32-NEXT: sltui $t1, $t1, 1 -+; LA32-NEXT: masknez $a7, $a7, $t1 -+; LA32-NEXT: maskeqz $a5, $a5, $t1 -+; LA32-NEXT: or $a5, $a5, $a7 -+; LA32-NEXT: add.w $a5, $a6, $a5 -+; LA32-NEXT: sltu $a6, $a5, $a6 -+; LA32-NEXT: add.w $a2, $a2, $a6 -+; LA32-NEXT: add.w $a0, $a2, $a0 -+; LA32-NEXT: add.w $a2, $a5, $t0 -+; LA32-NEXT: sltu $a5, $a2, $a5 -+; LA32-NEXT: add.w $a0, $a0, $a5 -+; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a5, $a1, $a5 -+; LA32-NEXT: sltu $a1, $a5, $a1 -+; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a6, $a4, $a6 -+; LA32-NEXT: add.w $a6, $a6, $a1 -+; LA32-NEXT: sltu $a7, $a6, $a4 -+; LA32-NEXT: xor $a4, $a6, $a4 -+; LA32-NEXT: sltui $a4, $a4, 1 -+; LA32-NEXT: masknez $a7, $a7, $a4 -+; LA32-NEXT: maskeqz $a1, $a1, $a4 -+; LA32-NEXT: or $a1, $a1, $a7 -+; LA32-NEXT: add.w $a1, $a2, $a1 -+; LA32-NEXT: sltu $a2, $a1, $a2 -+; LA32-NEXT: add.w $a0, $a0, $a2 -+; LA32-NEXT: srai.w $a2, $a3, 31 -+; LA32-NEXT: xor $a3, $a6, $a2 -+; LA32-NEXT: xor $a0, $a0, $a2 -+; LA32-NEXT: or $a0, $a3, $a0 -+; LA32-NEXT: xor $a3, $a5, $a2 -+; LA32-NEXT: xor $a1, $a1, $a2 -+; LA32-NEXT: or $a1, $a3, $a1 -+; LA32-NEXT: or $a0, $a1, $a0 - ; LA32-NEXT: sltu $a0, $zero, $a0 --; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload --; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload --; LA32-NEXT: addi.w $sp, $sp, 64 -+; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 96 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: smuloi128: - ; LA64: # %bb.0: --; LA64-NEXT: addi.d $sp, $sp, -32 --; LA64-NEXT: .cfi_def_cfa_offset 32 --; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill --; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill --; LA64-NEXT: .cfi_offset 1, -8 --; LA64-NEXT: .cfi_offset 22, -16 --; LA64-NEXT: move $fp, $a4 --; LA64-NEXT: st.d $zero, $sp, 8 --; LA64-NEXT: addi.d $a4, $sp, 8 --; LA64-NEXT: bl %plt(__muloti4) --; LA64-NEXT: st.d $a1, $fp, 8 --; LA64-NEXT: st.d $a0, $fp, 0 --; LA64-NEXT: ld.d $a0, $sp, 8 -+; LA64-NEXT: srai.d $a5, $a1, 63 -+; LA64-NEXT: mul.d $a6, $a2, $a5 -+; LA64-NEXT: mulh.du $a7, $a2, $a5 -+; LA64-NEXT: add.d $a7, $a7, $a6 -+; LA64-NEXT: mul.d $a5, $a3, $a5 -+; LA64-NEXT: add.d $a5, $a7, $a5 -+; LA64-NEXT: srai.d $a7, $a3, 63 -+; LA64-NEXT: mul.d $t0, $a7, $a1 -+; LA64-NEXT: mulh.du $t1, $a7, $a0 -+; LA64-NEXT: add.d $t0, $t1, $t0 -+; LA64-NEXT: mul.d $a7, $a7, $a0 -+; LA64-NEXT: add.d $t0, $t0, $a7 -+; LA64-NEXT: add.d $a5, $t0, $a5 -+; LA64-NEXT: mulh.du $t0, $a0, $a2 -+; LA64-NEXT: mul.d $t1, $a1, $a2 -+; LA64-NEXT: add.d $t0, $t1, $t0 -+; LA64-NEXT: sltu $t1, $t0, $t1 -+; LA64-NEXT: mulh.du $t2, $a1, $a2 -+; LA64-NEXT: add.d $t1, $t2, $t1 -+; LA64-NEXT: mul.d $t2, $a0, $a3 -+; LA64-NEXT: add.d $t0, $t2, $t0 -+; LA64-NEXT: sltu $t2, $t0, $t2 -+; LA64-NEXT: mulh.du $t3, $a0, $a3 -+; LA64-NEXT: add.d $t2, $t3, $t2 -+; LA64-NEXT: add.d $a6, $a7, $a6 -+; LA64-NEXT: sltu $a7, $a6, $a7 -+; LA64-NEXT: add.d $a5, $a5, $a7 -+; LA64-NEXT: mul.d $a0, $a0, $a2 -+; LA64-NEXT: mul.d $a2, $a1, $a3 -+; LA64-NEXT: mulh.du $a1, $a1, $a3 -+; LA64-NEXT: add.d $a3, $t1, $t2 -+; LA64-NEXT: sltu $a7, $a3, $t1 -+; LA64-NEXT: add.d $a1, $a1, $a7 -+; LA64-NEXT: st.d $a0, $a4, 0 -+; LA64-NEXT: add.d $a0, $a2, $a3 -+; LA64-NEXT: sltu $a2, $a0, $a2 -+; LA64-NEXT: add.d $a1, $a1, $a2 -+; LA64-NEXT: st.d $t0, $a4, 8 -+; LA64-NEXT: add.d $a1, $a1, $a5 -+; LA64-NEXT: add.d $a2, $a0, $a6 -+; LA64-NEXT: sltu $a0, $a2, $a0 -+; LA64-NEXT: add.d $a0, $a1, $a0 -+; LA64-NEXT: srai.d $a1, $t0, 63 -+; LA64-NEXT: xor $a0, $a0, $a1 -+; LA64-NEXT: xor $a1, $a2, $a1 -+; LA64-NEXT: or $a0, $a1, $a0 - ; LA64-NEXT: sltu $a0, $zero, $a0 --; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload --; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload --; LA64-NEXT: addi.d $sp, $sp, 32 - ; LA64-NEXT: ret - %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) - %val = extractvalue {i128, i1} %t, 0 --- -2.20.1 - - -From 01ced6193e2abfbd50fbd9d40066cf27f9f9067b Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 29 Nov 2023 15:21:21 +0800 -Subject: [PATCH 12/14] [LoongArch] Fix pattern for FNMSUB_{S/D} instructions - (#73742) - -``` -when a=c=-0.0, b=0.0: --(a * b + (-c)) = -0.0 --a * b + c = 0.0 -(fneg (fma a, b (-c))) != (fma (fneg a), b ,c) -``` - -See https://reviews.llvm.org/D90901 for a similar discussion on X86. - -(cherry picked from commit 5e7e0d603204ede803323a825318e365a87f73e9) ---- - .../LoongArch/LoongArchFloat32InstrInfo.td | 8 +- - .../LoongArch/LoongArchFloat64InstrInfo.td | 6 +- - llvm/test/CodeGen/LoongArch/double-fma.ll | 259 ++++++++++++++++-- - llvm/test/CodeGen/LoongArch/float-fma.ll | 259 ++++++++++++++++-- - 4 files changed, 483 insertions(+), 49 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -index 826db54febd3..65120c083f49 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -@@ -294,8 +294,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)), - def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)), - (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; - --// fnmsub.s: -fj * fk + fa --def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), -+// fnmsub.s: -(fj * fk - fa) -+def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))), -+ (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; -+ -+// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA) -+def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), - (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; - } // Predicates = [HasBasicF] - -diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -index 5118474725b6..437c1e4d7be2 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -@@ -256,7 +256,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)), - (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; - - // fnmsub.d: -(fj * fk - fa) --def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), -+def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))), -+ (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; -+ -+// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA) -+def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), - (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; - } // Predicates = [HasBasicD] - -diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll -index 6dd628479433..58d20c62a668 100644 ---- a/llvm/test/CodeGen/LoongArch/double-fma.ll -+++ b/llvm/test/CodeGen/LoongArch/double-fma.ll -@@ -236,13 +236,15 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { - ; LA32-CONTRACT-ON-LABEL: fnmsub_d: - ; LA32-CONTRACT-ON: # %bb.0: - ; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 --; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 -+; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 - ; LA32-CONTRACT-ON-NEXT: ret - ; - ; LA32-CONTRACT-OFF-LABEL: fnmsub_d: - ; LA32-CONTRACT-OFF: # %bb.0: - ; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 --; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 -+; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 - ; LA32-CONTRACT-OFF-NEXT: ret - ; - ; LA64-CONTRACT-FAST-LABEL: fnmsub_d: -@@ -253,12 +255,98 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-ON-LABEL: fnmsub_d: - ; LA64-CONTRACT-ON: # %bb.0: - ; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 --; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 -+; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 - ; LA64-CONTRACT-ON-NEXT: ret - ; - ; LA64-CONTRACT-OFF-LABEL: fnmsub_d: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 -+; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg double %c -+ %mul = fmul double %a, %b -+ %add = fadd double %mul, %negc -+ %neg = fneg double %add -+ ret double %neg -+} -+ -+define double @fnmsub_d_nsz(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %nega = fneg nsz double %a -+ %mul = fmul nsz double %nega, %b -+ %add = fadd nsz double %mul, %c -+ ret double %add -+} -+ -+;; Check that fnmsub.d is not emitted. -+define double @not_fnmsub_d(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: not_fnmsub_d: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: not_fnmsub_d: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 - ; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg double %a -@@ -483,6 +571,86 @@ define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg contract double %c -+ %mul = fmul contract double %a, %b -+ %add = fadd contract double %mul, %negc -+ %neg = fneg contract double %add -+ ret double %neg -+} -+ -+define double @contract_fnmsub_d_nsz(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %nega = fneg contract nsz double %a -+ %mul = fmul contract nsz double %nega, %b -+ %add = fadd contract nsz double %mul, %c -+ ret double %add -+} -+ -+;; Check that fnmsub.d is not emitted. -+define double @not_contract_fnmsub_d(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_d: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_d: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg contract double %a - %mul = fmul contract double %nega, %b -@@ -592,8 +760,8 @@ define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %fma = call double @llvm.fma.f64(double %a, double %b, double %c) -- %neg = fneg double %fma -- ret double %neg -+ %negfma = fneg double %fma -+ ret double %negfma - } - - define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { -@@ -704,44 +872,87 @@ define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg double %c -+ %fma = call double @llvm.fma.f64(double %a, double %b, double %negc) -+ %negfma = fneg double %fma -+ ret double %negfma -+} -+ -+define double @fnmsub_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg double %a -- %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) -+ %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %c) - ret double %fma - } - --define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind { --; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: -+;; Check that fnmsub.d is not emitted. -+define double @not_fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: - ; LA32-CONTRACT-FAST: # %bb.0: --; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-FAST-NEXT: ret - ; --; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: -+; LA32-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: - ; LA32-CONTRACT-ON: # %bb.0: --; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-ON-NEXT: ret - ; --; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: -+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: - ; LA32-CONTRACT-OFF: # %bb.0: --; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-OFF-NEXT: ret - ; --; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: -+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: - ; LA64-CONTRACT-FAST: # %bb.0: --; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-FAST-NEXT: ret - ; --; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: -+; LA64-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: - ; LA64-CONTRACT-ON: # %bb.0: --; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-ON-NEXT: ret - ; --; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: -+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: - ; LA64-CONTRACT-OFF: # %bb.0: --; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret -- %negb = fneg double %b -- %fma = call double @llvm.fma.f64(double %a, double %negb, double %c) -+ %nega = fneg double %a -+ %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) - ret double %fma - } - -@@ -882,6 +1093,8 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %mul = fmul contract double %a, %b -- %sub = fsub contract double %c, %mul -- ret double %sub -+ %negc = fneg contract double %c -+ %add = fadd contract double %negc, %mul -+ %negadd = fneg contract double %add -+ ret double %negadd - } -diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll -index 54dc56784006..c236255d971a 100644 ---- a/llvm/test/CodeGen/LoongArch/float-fma.ll -+++ b/llvm/test/CodeGen/LoongArch/float-fma.ll -@@ -236,13 +236,15 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { - ; LA32-CONTRACT-ON-LABEL: fnmsub_s: - ; LA32-CONTRACT-ON: # %bb.0: - ; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 --; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 -+; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 - ; LA32-CONTRACT-ON-NEXT: ret - ; - ; LA32-CONTRACT-OFF-LABEL: fnmsub_s: - ; LA32-CONTRACT-OFF: # %bb.0: - ; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 --; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 -+; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 - ; LA32-CONTRACT-OFF-NEXT: ret - ; - ; LA64-CONTRACT-FAST-LABEL: fnmsub_s: -@@ -253,12 +255,98 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-ON-LABEL: fnmsub_s: - ; LA64-CONTRACT-ON: # %bb.0: - ; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 --; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 -+; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 - ; LA64-CONTRACT-ON-NEXT: ret - ; - ; LA64-CONTRACT-OFF-LABEL: fnmsub_s: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 -+; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg float %c -+ %mul = fmul float %a, %b -+ %add = fadd float %mul, %negc -+ %neg = fneg float %add -+ ret float %neg -+} -+ -+define float @fnmsub_s_nsz(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %nega = fneg nsz float %a -+ %mul = fmul nsz float %nega, %b -+ %add = fadd nsz float %mul, %c -+ ret float %add -+} -+ -+;; Check that fnmsub.s is not emitted. -+define float @not_fnmsub_s(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: not_fnmsub_s: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: not_fnmsub_s: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 - ; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg float %a -@@ -483,6 +571,86 @@ define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg contract float %c -+ %mul = fmul contract float %a, %b -+ %add = fadd contract float %mul, %negc -+ %neg = fneg contract float %add -+ ret float %neg -+} -+ -+define float @contract_fnmsub_s_nsz(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %nega = fneg contract nsz float %a -+ %mul = fmul contract nsz float %nega, %b -+ %add = fadd contract nsz float %mul, %c -+ ret float %add -+} -+ -+;; Check that fnmsub.s is not emitted. -+define float @not_contract_fnmsub_s(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_s: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_s: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg contract float %a - %mul = fmul contract float %nega, %b -@@ -592,8 +760,8 @@ define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %fma = call float @llvm.fma.f64(float %a, float %b, float %c) -- %neg = fneg float %fma -- ret float %neg -+ %negfma = fneg float %fma -+ ret float %negfma - } - - define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { -@@ -704,44 +872,87 @@ define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg float %c -+ %fma = call float @llvm.fma.f64(float %a, float %b, float %negc) -+ %negfma = fneg float %fma -+ ret float %negfma -+} -+ -+define float @fnmsub_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg float %a -- %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) -+ %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %c) - ret float %fma - } - --define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { --; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: -+;; Check that fnmsub.s is not emitted. -+define float @not_fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: - ; LA32-CONTRACT-FAST: # %bb.0: --; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-FAST-NEXT: ret - ; --; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: -+; LA32-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: - ; LA32-CONTRACT-ON: # %bb.0: --; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-ON-NEXT: ret - ; --; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: -+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: - ; LA32-CONTRACT-OFF: # %bb.0: --; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-OFF-NEXT: ret - ; --; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: -+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: - ; LA64-CONTRACT-FAST: # %bb.0: --; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-FAST-NEXT: ret - ; --; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: -+; LA64-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: - ; LA64-CONTRACT-ON: # %bb.0: --; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-ON-NEXT: ret - ; --; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: -+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: - ; LA64-CONTRACT-OFF: # %bb.0: --; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret -- %negb = fneg float %b -- %fma = call float @llvm.fma.f64(float %a, float %negb, float %c) -+ %nega = fneg float %a -+ %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) - ret float %fma - } - -@@ -882,6 +1093,8 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %mul = fmul contract float %a, %b -- %sub = fsub contract float %c, %mul -- ret float %sub -+ %negc = fneg contract float %c -+ %add = fadd contract float %negc, %mul -+ %negadd = fneg contract float %add -+ ret float %negadd - } --- -2.20.1 - - -From 7a3bd125d9c1d0265b265ce238a88d0d4550e5a0 Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Wed, 3 Jan 2024 13:59:12 +0800 -Subject: [PATCH 13/14] [LoongArch] Fix the procossor series mask - -Refer PRID_SERIES_MASK definition in linux kernel: -arch/loongarch/include/asm/cpu.h. - -(cherry picked from commit 7e186d366d6c7def0543acc255931f617e76dff0) ---- - llvm/lib/TargetParser/Host.cpp | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp -index 81309280a44b..d11dc605e188 100644 ---- a/llvm/lib/TargetParser/Host.cpp -+++ b/llvm/lib/TargetParser/Host.cpp -@@ -1462,7 +1462,8 @@ StringRef sys::getHostCPUName() { - // Use processor id to detect cpu name. - uint32_t processor_id; - __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); -- switch (processor_id & 0xff00) { -+ // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. -+ switch (processor_id & 0xf000) { - case 0xc000: // Loongson 64bit, 4-issue - return "la464"; - // TODO: Others. --- -2.20.1 - - -From 3634ac4cbc475509c46521f5b8a3fcbeca6d06c7 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Mon, 11 Mar 2024 08:59:17 +0800 -Subject: [PATCH 14/14] [LoongArch] Make sure that the LoongArchISD::BSTRINS - node uses the correct `MSB` value (#84454) - -The `MSB` must not be greater than `GRLen`. Without this patch, newly -added test cases will crash with LoongArch32, resulting in a 'cannot -select' error. - -(cherry picked from commit edd4c6c6dca4c556de22b2ab73d5bfc02d28e59b) -(cherry picked from commit d77c5c3830d925b3795e2f1535a6568399fe6626) ---- - llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 +++- - llvm/test/CodeGen/LoongArch/bstrins_w.ll | 13 +++++++++++++ - 2 files changed, 16 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index ed106cb766bc..5affaf37ad5a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -2310,7 +2310,9 @@ Retry: - return DAG.getNode( - LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), - DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), -- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), -+ DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) -+ : (MaskIdx0 + MaskLen0 - 1), -+ DL, GRLenVT), - DAG.getConstant(MaskIdx0, DL, GRLenVT)); - } - -diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll -index dfbe000841cd..e008caacad2a 100644 ---- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll -+++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll -@@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind { - ret i32 %or - } - -+;; The high bits of `const` are zero. -+define i32 @pat5_high_zeros(i32 %a) nounwind { -+; CHECK-LABEL: pat5_high_zeros: -+; CHECK: # %bb.0: -+; CHECK-NEXT: lu12i.w $a1, 1 -+; CHECK-NEXT: ori $a1, $a1, 564 -+; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16 -+; CHECK-NEXT: ret -+ %and = and i32 %a, 65535 ; 0x0000ffff -+ %or = or i32 %and, 305397760 ; 0x12340000 -+ ret i32 %or -+} -+ - ;; Pattern 6: a = b | ((c & mask) << shamt) - ;; In this testcase b is 0x10000002, but in fact we do not require b being a - ;; constant. As long as all positions in b to be overwritten by the incoming --- -2.20.1 - diff --git a/0012-LoongArch-Pre-commit-test-for-issue-70890.patch b/0012-LoongArch-Pre-commit-test-for-issue-70890.patch new file mode 100644 index 0000000..3466fc6 --- /dev/null +++ b/0012-LoongArch-Pre-commit-test-for-issue-70890.patch @@ -0,0 +1,46 @@ +From 8a6049b9d4baf0df5abe7f156bf6ec4b3fb7a256 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Thu, 2 Nov 2023 09:29:43 +0800 +Subject: [PATCH 12/27] [LoongArch] Pre-commit test for issue #70890 + +(cherry picked from commit 749083b91f31f370cf64831d3e7e6215b6d51442) +--- + .../LoongArch/abi-lp64d-empty-unions.c | 26 +++++++++++++++++++ + 1 file changed, 26 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c + +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +new file mode 100644 +index 000000000000..b0607425336e +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +@@ -0,0 +1,26 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ ++// RUN: FileCheck --check-prefix=CHECK-C %s ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ ++// RUN: FileCheck --check-prefix=CHECK-CXX %s ++ ++#include ++ ++// CHECK-C: define{{.*}} void @test1() ++// CHECK-CXX: define{{.*}} i64 @_Z5test12u1(i64{{[^,]*}}) ++union u1 { }; ++union u1 test1(union u1 a) { ++ return a; ++} ++ ++struct s1 { ++ union u1 u; ++ int i; ++ float f; ++}; ++ ++// CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) ++/// FIXME: This doesn't match g++. ++// CHECK-CXX: define{{.*}} { i32, float } @_Z5test22s1(i32{{[^,]*}}, float{{[^,]*}}) ++struct s1 test2(struct s1 a) { ++ return a; ++} +-- +2.20.1 + diff --git a/0012-LoongArch-Set-some-operations-action-for-LSX-and-LAS.patch b/0012-LoongArch-Set-some-operations-action-for-LSX-and-LAS.patch new file mode 100644 index 0000000..bacede4 --- /dev/null +++ b/0012-LoongArch-Set-some-operations-action-for-LSX-and-LAS.patch @@ -0,0 +1,138 @@ +From c847fcdb11c17aa60940258c6d70c69c3f133c28 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 24 Oct 2023 15:46:56 +0800 +Subject: [PATCH 12/42] [LoongArch] Set some operations action for LSX and LASX + +First, expand all truncationg stores and extending loads. Second, +expand everything for `fixedlen_vector_valuetypes`. Finally, we +selectively turn on ones that can be effectively codegen'd. + +Simultaneously, this patch adds floating-point vector types to +load/store patterns. Additional test cases will be included in the IR +instruction test patchs. + +(cherry picked from commit f2441a06c609cedbb7e11303907f07bf0ca5cb2f) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 74 +++++++++++++++++-- + .../LoongArch/LoongArchLASXInstrInfo.td | 2 +- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 2 +- + 3 files changed, 69 insertions(+), 9 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 2f8ce57d3f5f..d3627cec2e8c 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -214,16 +214,76 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + + // Set operations for 'LSX' feature. + +- if (Subtarget.hasExtLSX()) +- setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, +- {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); ++ if (Subtarget.hasExtLSX()) { ++ for (MVT VT : MVT::fixedlen_vector_valuetypes()) { ++ // Expand all truncating stores and extending loads. ++ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { ++ setTruncStoreAction(VT, InnerVT, Expand); ++ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); ++ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); ++ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); ++ } ++ // By default everything must be expanded. Then we will selectively turn ++ // on ones that can be effectively codegen'd. ++ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) ++ setOperationAction(Op, VT, Expand); ++ } ++ ++ for (MVT VT : LSXVTs) { ++ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); ++ setOperationAction(ISD::BITCAST, VT, Legal); ++ setOperationAction(ISD::UNDEF, VT, Legal); ++ ++ // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it ++ // will be `Custom` handled in the future. ++ setOperationAction(ISD::BUILD_VECTOR, VT, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ } ++ for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { ++ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); ++ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, ++ Legal); ++ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, ++ VT, Legal); ++ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); ++ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); ++ setOperationAction(ISD::CTPOP, VT, Legal); ++ } ++ for (MVT VT : {MVT::v4f32, MVT::v2f64}) { ++ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); ++ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); ++ setOperationAction(ISD::FMA, VT, Legal); ++ } ++ } + + // Set operations for 'LASX' feature. + +- if (Subtarget.hasExtLASX()) +- setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, +- {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, +- Legal); ++ if (Subtarget.hasExtLASX()) { ++ for (MVT VT : LASXVTs) { ++ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); ++ setOperationAction(ISD::BITCAST, VT, Legal); ++ setOperationAction(ISD::UNDEF, VT, Legal); ++ ++ // FIXME: Same as above. ++ setOperationAction(ISD::BUILD_VECTOR, VT, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ } ++ for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { ++ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); ++ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, ++ Legal); ++ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, ++ VT, Legal); ++ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); ++ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); ++ setOperationAction(ISD::CTPOP, VT, Legal); ++ } ++ for (MVT VT : {MVT::v8f32, MVT::v4f64}) { ++ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); ++ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); ++ setOperationAction(ISD::FMA, VT, Legal); ++ } ++ } + + // Set DAG combine for LA32 and LA64. + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 947950be2b8f..e19aa92266b1 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1394,7 +1394,7 @@ def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), + (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; + + // Loads/Stores +-foreach vt = [v32i8, v16i16, v8i32, v4i64] in { ++foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { + defm : LdPat; + def : RegRegLdPat; + defm : StPat; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index e021adcecf4d..9391b1a8a20c 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1494,7 +1494,7 @@ def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), + (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; + + // Loads/Stores +-foreach vt = [v16i8, v8i16, v4i32, v2i64] in { ++foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { + defm : LdPat; + def : RegRegLdPat; + defm : StPat; +-- +2.20.1 + diff --git a/0012-lld-LoongArch-Add-a-another-corner-testcase-for-elf-.patch b/0012-lld-LoongArch-Add-a-another-corner-testcase-for-elf-.patch new file mode 100644 index 0000000..cbce30f --- /dev/null +++ b/0012-lld-LoongArch-Add-a-another-corner-testcase-for-elf-.patch @@ -0,0 +1,53 @@ +From fbb62a56d21fb7b609f5599f0d21416f3170a841 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Sat, 25 Nov 2023 15:44:05 +0800 +Subject: [PATCH 12/23] [lld][LoongArch] Add a another corner testcase for + elf::getLoongArchPageDelta + +Similar to e752b58e0d26. + +(cherry picked from commit 84a20989c6f72d0f7d04c9981d51c7838e95855c) +--- + lld/ELF/Arch/LoongArch.cpp | 1 - + lld/test/ELF/loongarch-pc-aligned.s | 13 +++++++++++++ + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp +index 72d9c6838e31..516d02bb9e3f 100644 +--- a/lld/ELF/Arch/LoongArch.cpp ++++ b/lld/ELF/Arch/LoongArch.cpp +@@ -168,7 +168,6 @@ uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { + result -= 0x10000'0000; + else if (!negativeA && negativeB) + result += 0x10000'0000; +- + return result; + } + +diff --git a/lld/test/ELF/loongarch-pc-aligned.s b/lld/test/ELF/loongarch-pc-aligned.s +index f6ac56e5261d..e7950400a5c8 100644 +--- a/lld/test/ELF/loongarch-pc-aligned.s ++++ b/lld/test/ELF/loongarch-pc-aligned.s +@@ -273,6 +273,19 @@ + # EXTREME16-NEXT: lu32i.d $t0, 0 + # EXTREME16-NEXT: lu52i.d $t0, $t0, 0 + ++## FIXME: Correct %pc64_lo20 should be 0x00000 (0) and %pc64_hi12 should be 0x000 (0), but current values are: ++## page delta = 0xffffffff80000000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x80000 = -524288 ++## %pc64_lo20 = 0xfffff = -1 ++## %pc64_hi12 = 0xfff = -1 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x000071238ffff888 --section-start=.text=0x0000712310000678 -o %t/extreme17 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme17 | FileCheck %s --check-prefix=EXTREME17 ++# EXTREME17: addi.d $t0, $zero, -1912 ++# EXTREME17-NEXT: pcalau12i $t1, -524288 ++# EXTREME17-NEXT: lu32i.d $t0, -1 ++# EXTREME17-NEXT: lu52i.d $t0, $t0, -1 ++ + #--- a.s + .rodata + x: +-- +2.20.1 + diff --git a/0012-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-_ULEB128-.patch b/0012-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-_ULEB128-.patch new file mode 100644 index 0000000..ca6f70d --- /dev/null +++ b/0012-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-_ULEB128-.patch @@ -0,0 +1,198 @@ +From a5c1174c902a9dc7fb15aa047ca31e012aea6af9 Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Tue, 5 Mar 2024 15:50:14 +0800 +Subject: [PATCH 12/14] [lld][LoongArch] Support the R_LARCH_{ADD,SUB}_ULEB128 + relocation types (#81133) + +For a label difference like `.uleb128 A-B`, MC generates a pair of +R_LARCH_{ADD,SUB}_ULEB128 if A-B cannot be folded as a constant. GNU +assembler generates a pair of relocations in more cases (when A or B is +in a code section with linker relaxation). It is similar to RISCV. + +R_LARCH_{ADD,SUB}_ULEB128 relocations are created by Clang and GCC in +`.gcc_except_table` and other debug sections with linker relaxation +enabled. On LoongArch, first read the buf and count the available space. +Then add or sub the value. Finally truncate the expected value and fill +it into the available space. + +(cherry picked from commit eaa9ef678c63bf392ec2d5b736605db7ea7e7338) +--- + lld/ELF/Arch/LoongArch.cpp | 19 +++++ + lld/test/ELF/loongarch-reloc-leb128.s | 102 ++++++++++++++++++++++++++ + 2 files changed, 121 insertions(+) + create mode 100644 lld/test/ELF/loongarch-reloc-leb128.s + +diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp +index 3f57a76873f9..160fab4aeba9 100644 +--- a/lld/ELF/Arch/LoongArch.cpp ++++ b/lld/ELF/Arch/LoongArch.cpp +@@ -11,6 +11,7 @@ + #include "Symbols.h" + #include "SyntheticSections.h" + #include "Target.h" ++#include "llvm/Support/LEB128.h" + + using namespace llvm; + using namespace llvm::object; +@@ -210,6 +211,16 @@ static bool isJirl(uint32_t insn) { + return (insn & 0xfc000000) == JIRL; + } + ++static void handleUleb128(uint8_t *loc, uint64_t val) { ++ const uint32_t maxcount = 1 + 64 / 7; ++ uint32_t count; ++ uint64_t orig = decodeULEB128(loc, &count); ++ if (count > maxcount) ++ errorOrWarn(getErrorLocation(loc) + "extra space for uleb128"); ++ uint64_t mask = count < maxcount ? (1ULL << 7 * count) - 1 : -1ULL; ++ encodeULEB128((orig + val) & mask, loc, count); ++} ++ + LoongArch::LoongArch() { + // The LoongArch ISA itself does not have a limit on page sizes. According to + // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is +@@ -451,11 +462,13 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, + case R_LARCH_ADD16: + case R_LARCH_ADD32: + case R_LARCH_ADD64: ++ case R_LARCH_ADD_ULEB128: + case R_LARCH_SUB6: + case R_LARCH_SUB8: + case R_LARCH_SUB16: + case R_LARCH_SUB32: + case R_LARCH_SUB64: ++ case R_LARCH_SUB_ULEB128: + // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse + // the RelExpr to avoid code duplication. + return R_RISCV_ADD; +@@ -670,6 +683,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, + case R_LARCH_ADD64: + write64le(loc, read64le(loc) + val); + return; ++ case R_LARCH_ADD_ULEB128: ++ handleUleb128(loc, val); ++ return; + case R_LARCH_SUB6: + *loc = (*loc & 0xc0) | ((*loc - val) & 0x3f); + return; +@@ -685,6 +701,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, + case R_LARCH_SUB64: + write64le(loc, read64le(loc) - val); + return; ++ case R_LARCH_SUB_ULEB128: ++ handleUleb128(loc, -val); ++ return; + + case R_LARCH_MARK_LA: + case R_LARCH_MARK_PCREL: +diff --git a/lld/test/ELF/loongarch-reloc-leb128.s b/lld/test/ELF/loongarch-reloc-leb128.s +new file mode 100644 +index 000000000000..7740ca797fca +--- /dev/null ++++ b/lld/test/ELF/loongarch-reloc-leb128.s +@@ -0,0 +1,102 @@ ++# REQUIRES: loongarch ++# RUN: rm -rf %t && split-file %s %t && cd %t ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax a.s -o a.o ++# RUN: llvm-readobj -r -x .gcc_except_table -x .debug_rnglists -x .debug_loclists a.o | FileCheck %s --check-prefix=REL ++# RUN: ld.lld -shared --gc-sections a.o -o a.so ++# RUN: llvm-readelf -x .gcc_except_table -x .debug_rnglists -x .debug_loclists a.so | FileCheck %s ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax a.s -o a32.o ++# RUN: llvm-readobj -r -x .gcc_except_table -x .debug_rnglists -x .debug_loclists a32.o | FileCheck %s --check-prefix=REL ++# RUN: ld.lld -shared --gc-sections a32.o -o a32.so ++# RUN: llvm-readelf -x .gcc_except_table -x .debug_rnglists -x .debug_loclists a32.so | FileCheck %s ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax extraspace.s -o extraspace32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax extraspace.s -o extraspace64.o ++# RUN: not ld.lld -shared extraspace32.o 2>&1 | FileCheck %s --check-prefix=ERROR ++# RUN: not ld.lld -shared extraspace64.o 2>&1 | FileCheck %s --check-prefix=ERROR ++# ERROR: error: extraspace{{.*}}.o:(.rodata+0x0): extra space for uleb128 ++ ++#--- a.s ++.cfi_startproc ++.cfi_lsda 0x1b,.LLSDA0 ++.cfi_endproc ++ ++.section .text.w,"axR" ++break 0; break 0; break 0; w1: ++ .p2align 4 # 4 bytes after relaxation ++w2: break 0 ++ ++.section .text.x,"ax" ++break 0; break 0; break 0; x1: ++ .p2align 4 # 4 bytes after relaxation ++x2: break 0 ++ ++.section .gcc_except_table,"a" ++.LLSDA0: ++.uleb128 w2-w1+116 # initial value: 0x0080 ++.uleb128 w1-w2+141 # initial value: 0x0080 ++.uleb128 w2-w1+16372 # initial value: 0x008080 ++.uleb128 w1-w2+16397 # initial value: 0x008080 ++.uleb128 w2-w1+2097140 # initial value: 0x00808080 ++.uleb128 w1-w2+2097165 # initial value: 0x00808080 ++ ++.section .debug_rnglists ++.uleb128 w2-w1+116 # initial value: 0x0080 ++.uleb128 w1-w2+141 # initial value: 0x0080 ++.uleb128 w2-w1+16372 # initial value: 0x008080 ++.uleb128 w1-w2+16397 # initial value: 0x008080 ++.uleb128 w2-w1+2097140 # initial value: 0x00808080 ++.uleb128 w1-w2+2097165 # initial value: 0x00808080 ++ ++.section .debug_loclists ++.uleb128 x2-x1 # references discarded symbols ++ ++# REL: Section ({{.*}}) .rela.debug_rnglists { ++# REL-NEXT: 0x0 R_LARCH_ADD_ULEB128 w2 0x74 ++# REL-NEXT: 0x0 R_LARCH_SUB_ULEB128 w1 0x0 ++# REL-NEXT: 0x2 R_LARCH_ADD_ULEB128 w1 0x8D ++# REL-NEXT: 0x2 R_LARCH_SUB_ULEB128 w2 0x0 ++# REL-NEXT: 0x4 R_LARCH_ADD_ULEB128 w2 0x3FF4 ++# REL-NEXT: 0x4 R_LARCH_SUB_ULEB128 w1 0x0 ++# REL-NEXT: 0x7 R_LARCH_ADD_ULEB128 w1 0x400D ++# REL-NEXT: 0x7 R_LARCH_SUB_ULEB128 w2 0x0 ++# REL-NEXT: 0xA R_LARCH_ADD_ULEB128 w2 0x1FFFF4 ++# REL-NEXT: 0xA R_LARCH_SUB_ULEB128 w1 0x0 ++# REL-NEXT: 0xE R_LARCH_ADD_ULEB128 w1 0x20000D ++# REL-NEXT: 0xE R_LARCH_SUB_ULEB128 w2 0x0 ++# REL-NEXT: } ++# REL: Section ({{.*}}) .rela.debug_loclists { ++# REL-NEXT: 0x0 R_LARCH_ADD_ULEB128 x2 0x0 ++# REL-NEXT: 0x0 R_LARCH_SUB_ULEB128 x1 0x0 ++# REL-NEXT: } ++ ++# REL: Hex dump of section '.gcc_except_table': ++# REL-NEXT: 0x00000000 80008000 80800080 80008080 80008080 . ++# REL-NEXT: 0x00000010 8000 . ++# REL: Hex dump of section '.debug_rnglists': ++# REL-NEXT: 0x00000000 80008000 80800080 80008080 80008080 . ++# REL-NEXT: 0x00000010 8000 . ++# REL: Hex dump of section '.debug_loclists': ++# REL-NEXT: 0x00000000 00 . ++ ++# CHECK: Hex dump of section '.gcc_except_table': ++# CHECK-NEXT: 0x[[#%x,]] f8008901 f8ff0089 8001f8ff ff008980 . ++# CHECK-NEXT: 0x[[#%x,]] 8001 . ++# CHECK: Hex dump of section '.debug_rnglists': ++# CHECK-NEXT: 0x00000000 f8008901 f8ff0089 8001f8ff ff008980 . ++# CHECK-NEXT: 0x00000010 8001 . ++# CHECK: Hex dump of section '.debug_loclists': ++# CHECK-NEXT: 0x00000000 0c . ++ ++#--- extraspace.s ++.text ++w1: ++ la.pcrel $t0, w1 ++w2: ++ ++.rodata ++.reloc ., R_LARCH_ADD_ULEB128, w2 ++.reloc ., R_LARCH_SUB_ULEB128, w1 ++.fill 10, 1, 0x80 ++.byte 0 +-- +2.20.1 + diff --git a/0013-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch b/0013-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch new file mode 100644 index 0000000..4e117ab --- /dev/null +++ b/0013-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch @@ -0,0 +1,350 @@ +From c7f4dc3b39fc0519884ff3553bbdc51627211d7e Mon Sep 17 00:00:00 2001 +From: licongtian +Date: Wed, 20 Sep 2023 11:21:56 +0800 +Subject: [PATCH 13/42] [Clang][LoongArch] Support compiler options + -mlsx/-mlasx for clang + +This patch adds compiler options -mlsx/-mlasx which enables the +instruction sets of LSX and LASX, and sets related predefined macros +according to the options. + +(cherry picked from commit 8d4e35600f3ba90997a59fdb9baeb196e723eec9) + +--- + .../clang/Basic/DiagnosticDriverKinds.td | 6 +++ + clang/include/clang/Driver/Options.td | 10 +++++ + clang/lib/Basic/Targets/LoongArch.cpp | 12 +++++- + clang/lib/Basic/Targets/LoongArch.h | 4 ++ + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 32 +++++++++++++++ + clang/test/Driver/loongarch-mlasx-error.c | 15 +++++++ + clang/test/Driver/loongarch-mlasx.c | 37 +++++++++++++++++ + clang/test/Driver/loongarch-mlsx-error.c | 12 ++++++ + clang/test/Driver/loongarch-mlsx.c | 41 +++++++++++++++++++ + clang/test/Preprocessor/init-loongarch.c | 35 ++++++++++++++++ + 10 files changed, 203 insertions(+), 1 deletion(-) + create mode 100644 clang/test/Driver/loongarch-mlasx-error.c + create mode 100644 clang/test/Driver/loongarch-mlasx.c + create mode 100644 clang/test/Driver/loongarch-mlsx-error.c + create mode 100644 clang/test/Driver/loongarch-mlsx.c + +diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td +index 1b69324d073a..8c751f2c4bda 100644 +--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td ++++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td +@@ -732,6 +732,12 @@ def warn_drv_loongarch_conflicting_implied_val : Warning< + InGroup; + def err_drv_loongarch_invalid_mfpu_EQ : Error< + "invalid argument '%0' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none)">; ++def err_drv_loongarch_wrong_fpu_width_for_lsx : Error< ++ "wrong fpu width; LSX depends on 64-bit FPU.">; ++def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< ++ "wrong fpu width; LASX depends on 64-bit FPU.">; ++def err_drv_loongarch_invalid_simd_option_combination : Error< ++ "invalid option combination; LASX depends on LSX.">; + + def err_drv_expand_response_file : Error< + "failed to expand response file: %0">; +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index 37e8c56b2d29..c94979b4dc34 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -192,6 +192,8 @@ def m_x86_Features_Group : OptionGroup<"">, + Group, Flags<[CoreOption]>, DocName<"X86">; + def m_riscv_Features_Group : OptionGroup<"">, + Group, DocName<"RISC-V">; ++def m_loongarch_Features_Group : OptionGroup<"">, ++ Group, DocName<"LoongArch">; + + def m_libc_Group : OptionGroup<"">, Group, + Flags<[HelpHidden]>; +@@ -4190,6 +4192,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> + def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; ++def mlsx : Flag<["-"], "mlsx">, Group, ++ HelpText<"Enable Loongson SIMD Extension (LSX).">; ++def mno_lsx : Flag<["-"], "mno-lsx">, Group, ++ HelpText<"Disable Loongson SIMD Extension (LSX).">; ++def mlasx : Flag<["-"], "mlasx">, Group, ++ HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; ++def mno_lasx : Flag<["-"], "mno-lasx">, Group, ++ HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; + def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; +diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp +index 4448a2ae10a1..88537989a051 100644 +--- a/clang/lib/Basic/Targets/LoongArch.cpp ++++ b/clang/lib/Basic/Targets/LoongArch.cpp +@@ -208,6 +208,11 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, + TuneCPU = ArchName; + Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); + ++ if (HasFeatureLSX) ++ Builder.defineMacro("__loongarch_sx", Twine(1)); ++ if (HasFeatureLASX) ++ Builder.defineMacro("__loongarch_asx", Twine(1)); ++ + StringRef ABI = getABI(); + if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") + Builder.defineMacro("__loongarch_lp64"); +@@ -257,6 +262,8 @@ bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { + .Case("loongarch64", Is64Bit) + .Case("32bit", !Is64Bit) + .Case("64bit", Is64Bit) ++ .Case("lsx", HasFeatureLSX) ++ .Case("lasx", HasFeatureLASX) + .Default(false); + } + +@@ -274,7 +281,10 @@ bool LoongArchTargetInfo::handleTargetFeatures( + if (Feature == "+d") { + HasFeatureD = true; + } +- } ++ } else if (Feature == "+lsx") ++ HasFeatureLSX = true; ++ else if (Feature == "+lasx") ++ HasFeatureLASX = true; + } + return true; + } +diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h +index 34143f462a24..8f4150b2539d 100644 +--- a/clang/lib/Basic/Targets/LoongArch.h ++++ b/clang/lib/Basic/Targets/LoongArch.h +@@ -27,12 +27,16 @@ protected: + std::string CPU; + bool HasFeatureD; + bool HasFeatureF; ++ bool HasFeatureLSX; ++ bool HasFeatureLASX; + + public: + LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) + : TargetInfo(Triple) { + HasFeatureD = false; + HasFeatureF = false; ++ HasFeatureLSX = false; ++ HasFeatureLASX = false; + LongDoubleWidth = 128; + LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +index 65925e9ed610..31153a67ad28 100644 +--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -175,6 +175,38 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + A->ignoreTargetSpecific(); + if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) + A->ignoreTargetSpecific(); ++ ++ // Select lsx feature determined by -m[no-]lsx. ++ if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { ++ // LSX depends on 64-bit FPU. ++ // -m*-float and -mfpu=none/0/32 conflict with -mlsx. ++ if (A->getOption().matches(options::OPT_mlsx)) { ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); ++ else /*-mlsx*/ ++ Features.push_back("+lsx"); ++ } else /*-mno-lsx*/ { ++ Features.push_back("-lsx"); ++ } ++ } ++ ++ // Select lasx feature determined by -m[no-]lasx. ++ if (const Arg *A = ++ Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { ++ // LASX depends on 64-bit FPU and LSX. ++ // -mno-lsx conflicts with -mlasx. ++ if (A->getOption().matches(options::OPT_mlasx)) { ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); ++ else if (llvm::find(Features, "-lsx") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); ++ else { /*-mlasx*/ ++ Features.push_back("+lsx"); ++ Features.push_back("+lasx"); ++ } ++ } else /*-mno-lasx*/ ++ Features.push_back("-lasx"); ++ } + } + + std::string loongarch::postProcessTargetCPUString(const std::string &CPU, +diff --git a/clang/test/Driver/loongarch-mlasx-error.c b/clang/test/Driver/loongarch-mlasx-error.c +new file mode 100644 +index 000000000000..e66f277f7c29 +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlasx-error.c +@@ -0,0 +1,15 @@ ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msingle-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msoft-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=32 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=0 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=none 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mno-lsx 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU128 %s ++ ++// ERROR_LASX_FPU64: error: wrong fpu width; LASX depends on 64-bit FPU. ++// ERROR_LASX_FPU128: error: invalid option combination; LASX depends on LSX. +diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c +new file mode 100644 +index 000000000000..0b934f125c9e +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlasx.c +@@ -0,0 +1,37 @@ ++/// Test -m[no-]lasx options. ++ ++// RUN: %clang --target=loongarch64 -mlasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++// RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++// RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++ ++// RUN: %clang --target=loongarch64 -mlasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++// RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++// RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++ ++// CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" ++// CC1-NOLASX: "-target-feature" "-lasx" ++ ++// IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" ++// IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" ++ ++int foo(void){ ++ return 3; ++} +diff --git a/clang/test/Driver/loongarch-mlsx-error.c b/clang/test/Driver/loongarch-mlsx-error.c +new file mode 100644 +index 000000000000..bd6b8e2718bf +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlsx-error.c +@@ -0,0 +1,12 @@ ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msingle-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msoft-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=32 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=0 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=none 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++ ++// ERROR_LSX_FPU64: error: wrong fpu width; LSX depends on 64-bit FPU. +diff --git a/clang/test/Driver/loongarch-mlsx.c b/clang/test/Driver/loongarch-mlsx.c +new file mode 100644 +index 000000000000..7d4307b078e1 +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlsx.c +@@ -0,0 +1,41 @@ ++/// Test -m[no-]lsx options. ++ ++// RUN: %clang --target=loongarch64 -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLSX ++ ++// RUN: %clang --target=loongarch64 -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLSX ++ ++// CC1-LSX: "-target-feature" "+lsx" ++// CC1-NOLSX: "-target-feature" "-lsx" ++ ++// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" ++// IR-NOLSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lsx{{(,.*)?}}" ++ ++int foo(void){ ++ return 3; ++} +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +index 4ef42a921ec0..e235a7283021 100644 +--- a/clang/test/Preprocessor/init-loongarch.c ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -807,3 +807,38 @@ + + // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" + // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" ++ ++// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// MLSX-NOT: #define __loongarch_asx ++// MLSX: #define __loongarch_sx 1 ++ ++// RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// MLASX: #define __loongarch_asx 1 ++// MLASX: #define __loongarch_sx 1 ++ ++// RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// MNO-LSX-NOT: #define __loongarch_asx ++// MNO-LSX-NOT: #define __loongarch_sx +-- +2.20.1 + diff --git a/0013-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch b/0013-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch new file mode 100644 index 0000000..de3bcce --- /dev/null +++ b/0013-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch @@ -0,0 +1,68 @@ +From 313138921cd84f1be06e2ecbf0f6e854242bb8b9 Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Sat, 4 Nov 2023 10:04:37 +0800 +Subject: [PATCH 13/27] [LoongArch] Fix ABI mismatch with g++ when handling + empty unions (#71025) + +In g++, empty unions are not ignored like empty structs when flattening +structs to examine whether the structs can be passed via FARs in C++. +This patch aligns clang++ with g++. + +Fix https://github.com/llvm/llvm-project/issues/70890. + +(cherry picked from commit 4253fdc2c462da61cc0deb74a43265665720c828) +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 7 ++++--- + clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c | 2 +- + clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c | 3 +-- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index bc508a99da9c..63b9a1fdb988 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -170,10 +170,11 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( + // copy constructor are not eligible for the FP calling convention. + if (getRecordArgABI(Ty, CGT.getCXXABI())) + return false; +- if (isEmptyRecord(getContext(), Ty, true, true)) +- return true; + const RecordDecl *RD = RTy->getDecl(); +- // Unions aren't eligible unless they're empty (which is caught above). ++ if (isEmptyRecord(getContext(), Ty, true, true) && ++ (!RD->isUnion() || !isa(RD))) ++ return true; ++ // Unions aren't eligible unless they're empty in C (which is caught above). + if (RD->isUnion()) + return false; + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index 281b7b15841a..2f7596f0ebdc 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -3,7 +3,7 @@ + // RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ + // RUN: FileCheck --check-prefix=CHECK-CXX %s + +-// Fields containing empty structs or unions are ignored when flattening ++// Fields containing empty structs are ignored when flattening + // structs to examine whether the structs can be passed via FARs, even in C++. + // But there is an exception that non-zero-length array of empty structures are + // not ignored in C++. These rules are not documented in psABI +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +index b0607425336e..363e37efb646 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +@@ -19,8 +19,7 @@ struct s1 { + }; + + // CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) +-/// FIXME: This doesn't match g++. +-// CHECK-CXX: define{{.*}} { i32, float } @_Z5test22s1(i32{{[^,]*}}, float{{[^,]*}}) ++// CHECK-CXX: define{{.*}} [2 x i64] @_Z5test22s1([2 x i64]{{[^,]*}}) + struct s1 test2(struct s1 a) { + return a; + } +-- +2.20.1 + diff --git a/0009-Backport-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-private-symbols-in-.uleb128-for-label-differences.patch b/0013-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-priv.patch similarity index 97% rename from 0009-Backport-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-private-symbols-in-.uleb128-for-label-differences.patch rename to 0013-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-priv.patch index 94bb772..7cedd59 100644 --- a/0009-Backport-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-private-symbols-in-.uleb128-for-label-differences.patch +++ b/0013-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-priv.patch @@ -8,7 +8,6 @@ On Mach-O, `.uleb128 A-B` where A and B are separated by a non-private symbol is (see D153167). (cherry picked from commit 0a89bda4a8b756a00985e0965f7686b5ceb43295) -Change-Id: I92ed11d6913b8c781e29be6e8c642cf0a371910d --- llvm/test/MC/ELF/uleb-ehtable.s | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/0013-lld-LoongArch-Handle-extreme-code-model-relocs-accor.patch b/0013-lld-LoongArch-Handle-extreme-code-model-relocs-accor.patch new file mode 100644 index 0000000..f80f7b7 --- /dev/null +++ b/0013-lld-LoongArch-Handle-extreme-code-model-relocs-accor.patch @@ -0,0 +1,443 @@ +From 2a30bd5bf5ce238b5677943dd93d26c5c3ebc08d Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Wed, 10 Jan 2024 18:03:52 +0800 +Subject: [PATCH 13/23] [lld][LoongArch] Handle extreme code model relocs + according to psABI v2.30 (#73387) + +psABI v2.30 requires the extreme code model instructions sequence +(pcalau12i+addi.d+lu32i.d+lu52i.d) to be adjacent. + +See https://github.com/llvm/llvm-project/pull/71907 and +https://github.com/loongson-community/discussions/issues/17 for details. + +(cherry picked from commit 38394a3d0b8b9a1fdc444bdebeba17a19250997d) +--- + lld/ELF/Arch/LoongArch.cpp | 110 +++++++--------------------- + lld/ELF/InputSection.cpp | 10 +-- + lld/ELF/Target.h | 2 +- + lld/test/ELF/loongarch-pc-aligned.s | 109 ++++++++++++++------------- + 4 files changed, 93 insertions(+), 138 deletions(-) + +diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp +index 516d02bb9e3f..19147a0f6df6 100644 +--- a/lld/ELF/Arch/LoongArch.cpp ++++ b/lld/ELF/Arch/LoongArch.cpp +@@ -85,89 +85,33 @@ static uint64_t getLoongArchPage(uint64_t p) { + static uint32_t lo12(uint32_t val) { return val & 0xfff; } + + // Calculate the adjusted page delta between dest and PC. +-uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { +- // Consider the large code model access pattern, of which the smaller code +- // models' access patterns are a subset: +- // +- // pcalau12i U, %foo_hi20(sym) ; b in [-0x80000, 0x7ffff] +- // addi.d T, zero, %foo_lo12(sym) ; a in [-0x800, 0x7ff] +- // lu32i.d T, %foo64_lo20(sym) ; c in [-0x80000, 0x7ffff] +- // lu52i.d T, T, %foo64_hi12(sym) ; d in [-0x800, 0x7ff] +- // {ldx,stx,add}.* dest, U, T +- // +- // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA, +- // with RQ, P, ZY, X and A representing the respective bitfields as unsigned +- // integers. We have: +- // +- // page(dest) = 0xZZZ'YYYYY'XXXXX'000 +- // - page(pc) = 0xRRR'QQQQQ'PPPPP'000 +- // ---------------------------------- +- // 0xddd'ccccc'bbbbb'000 +- // +- // Now consider the above pattern's actual effects: +- // +- // page(pc) 0xRRR'QQQQQ'PPPPP'000 +- // pcalau12i + 0xiii'iiiii'bbbbb'000 +- // addi + 0xjjj'jjjjj'kkkkk'AAA +- // lu32i.d & lu52i.d + 0xddd'ccccc'00000'000 +- // -------------------------------------------------- +- // dest = U + T +- // = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32)) +- // = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A +- // = (ZY<<32) + (X<<12) + A +- // +- // ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k +- // cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k +- // +- // where i and k are terms representing the effect of b's and A's sign +- // extension respectively. +- // +- // i = signed b < 0 ? -0x10000'0000 : 0 +- // k = signed A < 0 ? -0x1000 : 0 +- // +- // The j term is a bit complex: it represents the higher half of +- // sign-extended bits from A that are effectively lost if i == 0 but k != 0, +- // due to overwriting by lu32i.d & lu52i.d. +- // +- // j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0 +- // +- // The actual effect of the instruction sequence before the final addition, +- // i.e. our desired result value, is thus: +- // +- // result = (cd<<32) + (b<<12) +- // = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k +- // = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k +- // = page(dest) - page(pc) - i - j - k +- // +- // when signed A >= 0 && signed b >= 0: +- // +- // i = j = k = 0 +- // result = page(dest) - page(pc) +- // +- // when signed A >= 0 && signed b < 0: +- // +- // i = -0x10000'0000, j = k = 0 +- // result = page(dest) - page(pc) + 0x10000'0000 +- // +- // when signed A < 0 && signed b >= 0: +- // +- // i = 0, j = 0x10000'0000, k = -0x1000 +- // result = page(dest) - page(pc) - 0x10000'0000 + 0x1000 +- // +- // when signed A < 0 && signed b < 0: +- // +- // i = -0x10000'0000, j = 0, k = -0x1000 +- // result = page(dest) - page(pc) + 0x1000 +- uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc); +- bool negativeA = lo12(dest) > 0x7ff; +- bool negativeB = (result & 0x8000'0000) != 0; +- +- if (negativeA) +- result += 0x1000; +- if (negativeA && !negativeB) +- result -= 0x10000'0000; +- else if (!negativeA && negativeB) +- result += 0x10000'0000; ++uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) { ++ // Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d ++ // + lu52i.d`, they must be adjancent so that we can infer the PC of ++ // `pcalau12i` when calculating the page delta for the other two instructions ++ // (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit ++ // complicated. Just use psABI recommended algorithm. ++ uint64_t pcalau12i_pc; ++ switch (type) { ++ case R_LARCH_PCALA64_LO20: ++ case R_LARCH_GOT64_PC_LO20: ++ case R_LARCH_TLS_IE64_PC_LO20: ++ pcalau12i_pc = pc - 8; ++ break; ++ case R_LARCH_PCALA64_HI12: ++ case R_LARCH_GOT64_PC_HI12: ++ case R_LARCH_TLS_IE64_PC_HI12: ++ pcalau12i_pc = pc - 12; ++ break; ++ default: ++ pcalau12i_pc = pc; ++ break; ++ } ++ uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pcalau12i_pc); ++ if (dest & 0x800) ++ result += 0x1000 - 0x1'0000'0000; ++ if (result & 0x8000'0000) ++ result += 0x1'0000'0000; + return result; + } + +diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp +index b178d82407e3..44444b62251d 100644 +--- a/lld/ELF/InputSection.cpp ++++ b/lld/ELF/InputSection.cpp +@@ -712,8 +712,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + return sym.getGotVA() + a - p; + case R_LOONGARCH_GOT_PAGE_PC: + if (sym.hasFlag(NEEDS_TLSGD)) +- return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); +- return getLoongArchPageDelta(sym.getGotVA() + a, p); ++ return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); ++ return getLoongArchPageDelta(sym.getGotVA() + a, p, type); + case R_MIPS_GOTREL: + return sym.getVA(a) - in.mipsGot->getGp(file); + case R_MIPS_GOT_GP: +@@ -763,7 +763,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + return 0; + } + case R_LOONGARCH_PAGE_PC: +- return getLoongArchPageDelta(sym.getVA(a), p); ++ return getLoongArchPageDelta(sym.getVA(a), p, type); + case R_PC: + case R_ARM_PCA: { + uint64_t dest; +@@ -798,7 +798,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + case R_PPC64_CALL_PLT: + return sym.getPltVA() + a - p; + case R_LOONGARCH_PLT_PAGE_PC: +- return getLoongArchPageDelta(sym.getPltVA() + a, p); ++ return getLoongArchPageDelta(sym.getPltVA() + a, p, type); + case R_PLT_GOTPLT: + return sym.getPltVA() + a - in.gotPlt->getVA(); + case R_PPC32_PLTREL: +@@ -860,7 +860,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + case R_TLSGD_PC: + return in.got->getGlobalDynAddr(sym) + a - p; + case R_LOONGARCH_TLSGD_PAGE_PC: +- return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); ++ return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); + case R_TLSLD_GOTPLT: + return in.got->getVA() + in.got->getTlsIndexOff() + a - in.gotPlt->getVA(); + case R_TLSLD_GOT: +diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h +index bf831afa1793..aeabe47f92a1 100644 +--- a/lld/ELF/Target.h ++++ b/lld/ELF/Target.h +@@ -229,7 +229,7 @@ void addPPC64SaveRestore(); + uint64_t getPPC64TocBase(); + uint64_t getAArch64Page(uint64_t expr); + template void writeARMCmseImportLib(); +-uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc); ++uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type); + void riscvFinalizeRelax(int passes); + void mergeRISCVAttributesSections(); + void addArmInputSectionMappingSymbols(); +diff --git a/lld/test/ELF/loongarch-pc-aligned.s b/lld/test/ELF/loongarch-pc-aligned.s +index e7950400a5c8..0405961e5f74 100644 +--- a/lld/test/ELF/loongarch-pc-aligned.s ++++ b/lld/test/ELF/loongarch-pc-aligned.s +@@ -75,8 +75,8 @@ + ## %pc64_hi12 = 0x444 = 1092 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme0 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme0 | FileCheck %s --check-prefix=EXTREME0 +-# EXTREME0: addi.d $t0, $zero, 273 +-# EXTREME0-NEXT: pcalau12i $t1, 139810 ++# EXTREME0: pcalau12i $t1, 139810 ++# EXTREME0-NEXT: addi.d $t0, $zero, 273 + # EXTREME0-NEXT: lu32i.d $t0, 209715 + # EXTREME0-NEXT: lu52i.d $t0, $t0, 1092 + +@@ -87,8 +87,8 @@ + ## %pc64_hi12 = 0x444 = 1092 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme1 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme1 | FileCheck %s --check-prefix=EXTREME1 +-# EXTREME1: addi.d $t0, $zero, -1912 +-# EXTREME1-NEXT: pcalau12i $t1, 139811 ++# EXTREME1: pcalau12i $t1, 139811 ++# EXTREME1-NEXT: addi.d $t0, $zero, -1912 + # EXTREME1-NEXT: lu32i.d $t0, 209714 + # EXTREME1-NEXT: lu52i.d $t0, $t0, 1092 + +@@ -99,8 +99,8 @@ + ## %pc64_hi12 = 0x444 = 1092 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme2 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme2 | FileCheck %s --check-prefix=EXTREME2 +-# EXTREME2: addi.d $t0, $zero, 273 +-# EXTREME2-NEXT: pcalau12i $t1, -419431 ++# EXTREME2: pcalau12i $t1, -419431 ++# EXTREME2-NEXT: addi.d $t0, $zero, 273 + # EXTREME2-NEXT: lu32i.d $t0, 209716 + # EXTREME2-NEXT: lu52i.d $t0, $t0, 1092 + +@@ -111,8 +111,8 @@ + ## %pc64_hi12 = 0x444 = 1092 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme3 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme3 | FileCheck %s --check-prefix=EXTREME3 +-# EXTREME3: addi.d $t0, $zero, -1912 +-# EXTREME3-NEXT: pcalau12i $t1, -419430 ++# EXTREME3: pcalau12i $t1, -419430 ++# EXTREME3-NEXT: addi.d $t0, $zero, -1912 + # EXTREME3-NEXT: lu32i.d $t0, 209715 + # EXTREME3-NEXT: lu52i.d $t0, $t0, 1092 + +@@ -123,8 +123,8 @@ + ## %pc64_hi12 = 0x444 = 1092 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme4 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme4 | FileCheck %s --check-prefix=EXTREME4 +-# EXTREME4: addi.d $t0, $zero, 273 +-# EXTREME4-NEXT: pcalau12i $t1, 139810 ++# EXTREME4: pcalau12i $t1, 139810 ++# EXTREME4-NEXT: addi.d $t0, $zero, 273 + # EXTREME4-NEXT: lu32i.d $t0, -349526 + # EXTREME4-NEXT: lu52i.d $t0, $t0, 1092 + +@@ -135,8 +135,8 @@ + ## %pc64_hi12 = 0x444 = 1092 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme5 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme5 | FileCheck %s --check-prefix=EXTREME5 +-# EXTREME5: addi.d $t0, $zero, -1912 +-# EXTREME5-NEXT: pcalau12i $t1, 139811 ++# EXTREME5: pcalau12i $t1, 139811 ++# EXTREME5-NEXT: addi.d $t0, $zero, -1912 + # EXTREME5-NEXT: lu32i.d $t0, -349527 + # EXTREME5-NEXT: lu52i.d $t0, $t0, 1092 + +@@ -147,8 +147,8 @@ + ## %pc64_hi12 = 0x444 = 1092 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme6 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme6 | FileCheck %s --check-prefix=EXTREME6 +-# EXTREME6: addi.d $t0, $zero, 273 +-# EXTREME6-NEXT: pcalau12i $t1, -419431 ++# EXTREME6: pcalau12i $t1, -419431 ++# EXTREME6-NEXT: addi.d $t0, $zero, 273 + # EXTREME6-NEXT: lu32i.d $t0, -349525 + # EXTREME6-NEXT: lu52i.d $t0, $t0, 1092 + +@@ -159,8 +159,8 @@ + ## %pc64_hi12 = 0x444 = 1092 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme7 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme7 | FileCheck %s --check-prefix=EXTREME7 +-# EXTREME7: addi.d $t0, $zero, -1912 +-# EXTREME7-NEXT: pcalau12i $t1, -419430 ++# EXTREME7: pcalau12i $t1, -419430 ++# EXTREME7-NEXT: addi.d $t0, $zero, -1912 + # EXTREME7-NEXT: lu32i.d $t0, -349526 + # EXTREME7-NEXT: lu52i.d $t0, $t0, 1092 + +@@ -171,8 +171,8 @@ + ## %pc64_hi12 = 0xbbb = -1093 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme8 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme8 | FileCheck %s --check-prefix=EXTREME8 +-# EXTREME8: addi.d $t0, $zero, 273 +-# EXTREME8-NEXT: pcalau12i $t1, 139810 ++# EXTREME8: pcalau12i $t1, 139810 ++# EXTREME8-NEXT: addi.d $t0, $zero, 273 + # EXTREME8-NEXT: lu32i.d $t0, 209715 + # EXTREME8-NEXT: lu52i.d $t0, $t0, -1093 + +@@ -183,8 +183,8 @@ + ## %pc64_hi12 = 0xbbb = -1093 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme9 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme9 | FileCheck %s --check-prefix=EXTREME9 +-# EXTREME9: addi.d $t0, $zero, -1912 +-# EXTREME9-NEXT: pcalau12i $t1, 139811 ++# EXTREME9: pcalau12i $t1, 139811 ++# EXTREME9-NEXT: addi.d $t0, $zero, -1912 + # EXTREME9-NEXT: lu32i.d $t0, 209714 + # EXTREME9-NEXT: lu52i.d $t0, $t0, -1093 + +@@ -195,8 +195,8 @@ + ## %pc64_hi12 = 0xbbb = -1093 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme10 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme10 | FileCheck %s --check-prefix=EXTREME10 +-# EXTREME10: addi.d $t0, $zero, 273 +-# EXTREME10-NEXT: pcalau12i $t1, -419431 ++# EXTREME10: pcalau12i $t1, -419431 ++# EXTREME10-NEXT: addi.d $t0, $zero, 273 + # EXTREME10-NEXT: lu32i.d $t0, 209716 + # EXTREME10-NEXT: lu52i.d $t0, $t0, -1093 + +@@ -207,8 +207,8 @@ + ## %pc64_hi12 = 0xbbb = -1093 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme11 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme11 | FileCheck %s --check-prefix=EXTREME11 +-# EXTREME11: addi.d $t0, $zero, -1912 +-# EXTREME11-NEXT: pcalau12i $t1, -419430 ++# EXTREME11: pcalau12i $t1, -419430 ++# EXTREME11-NEXT: addi.d $t0, $zero, -1912 + # EXTREME11-NEXT: lu32i.d $t0, 209715 + # EXTREME11-NEXT: lu52i.d $t0, $t0, -1093 + +@@ -219,8 +219,8 @@ + ## %pc64_hi12 = 0xbbb = -1093 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme12 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme12 | FileCheck %s --check-prefix=EXTREME12 +-# EXTREME12: addi.d $t0, $zero, 273 +-# EXTREME12-NEXT: pcalau12i $t1, 139810 ++# EXTREME12: pcalau12i $t1, 139810 ++# EXTREME12-NEXT: addi.d $t0, $zero, 273 + # EXTREME12-NEXT: lu32i.d $t0, -349526 + # EXTREME12-NEXT: lu52i.d $t0, $t0, -1093 + +@@ -231,8 +231,8 @@ + ## %pc64_hi12 = 0xbbb = -1093 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme13 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme13 | FileCheck %s --check-prefix=EXTREME13 +-# EXTREME13: addi.d $t0, $zero, -1912 +-# EXTREME13-NEXT: pcalau12i $t1, 139811 ++# EXTREME13: pcalau12i $t1, 139811 ++# EXTREME13-NEXT: addi.d $t0, $zero, -1912 + # EXTREME13-NEXT: lu32i.d $t0, -349527 + # EXTREME13-NEXT: lu52i.d $t0, $t0, -1093 + +@@ -243,8 +243,8 @@ + ## %pc64_hi12 = 0xbbb = -1093 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme14 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme14 | FileCheck %s --check-prefix=EXTREME14 +-# EXTREME14: addi.d $t0, $zero, 273 +-# EXTREME14-NEXT: pcalau12i $t1, -419431 ++# EXTREME14: pcalau12i $t1, -419431 ++# EXTREME14-NEXT: addi.d $t0, $zero, 273 + # EXTREME14-NEXT: lu32i.d $t0, -349525 + # EXTREME14-NEXT: lu52i.d $t0, $t0, -1093 + +@@ -255,36 +255,47 @@ + ## %pc64_hi12 = 0xbbb = -1093 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme15 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme15 | FileCheck %s --check-prefix=EXTREME15 +-# EXTREME15: addi.d $t0, $zero, -1912 +-# EXTREME15-NEXT: pcalau12i $t1, -419430 ++# EXTREME15: pcalau12i $t1, -419430 ++# EXTREME15-NEXT: addi.d $t0, $zero, -1912 + # EXTREME15-NEXT: lu32i.d $t0, -349526 + # EXTREME15-NEXT: lu52i.d $t0, $t0, -1093 + +-## FIXME: Correct %pc64_lo20 should be 0xfffff (-1) and %pc64_hi12 should be 0xfff (-1), but current values are: +-## page delta = 0x0000000000000000, page offset = 0x888 ++## page delta = 0xffffffff00000000, page offset = 0x888 + ## %pc_lo12 = 0x888 = -1912 + ## %pc_hi20 = 0x00000 = 0 +-## %pc64_lo20 = 0x00000 = 0 +-## %pc64_hi12 = 0x00000 = 0 ++## %pc64_lo20 = 0xfffff = -1 ++## %pc64_hi12 = 0xfff = -1 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x0000000012344888 --section-start=.text=0x0000000012345678 -o %t/extreme16 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme16 | FileCheck %s --check-prefix=EXTREME16 +-# EXTREME16: addi.d $t0, $zero, -1912 +-# EXTREME16-NEXT: pcalau12i $t1, 0 +-# EXTREME16-NEXT: lu32i.d $t0, 0 +-# EXTREME16-NEXT: lu52i.d $t0, $t0, 0 ++# EXTREME16: pcalau12i $t1, 0 ++# EXTREME16-NEXT: addi.d $t0, $zero, -1912 ++# EXTREME16-NEXT: lu32i.d $t0, -1 ++# EXTREME16-NEXT: lu52i.d $t0, $t0, -1 + +-## FIXME: Correct %pc64_lo20 should be 0x00000 (0) and %pc64_hi12 should be 0x000 (0), but current values are: +-## page delta = 0xffffffff80000000, page offset = 0x888 ++## page delta = 0x0000000080000000, page offset = 0x888 + ## %pc_lo12 = 0x888 = -1912 + ## %pc_hi20 = 0x80000 = -524288 +-## %pc64_lo20 = 0xfffff = -1 +-## %pc64_hi12 = 0xfff = -1 ++## %pc64_lo20 = 0xfffff = 0 ++## %pc64_hi12 = 0xfff = 0 + # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x000071238ffff888 --section-start=.text=0x0000712310000678 -o %t/extreme17 + # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme17 | FileCheck %s --check-prefix=EXTREME17 +-# EXTREME17: addi.d $t0, $zero, -1912 +-# EXTREME17-NEXT: pcalau12i $t1, -524288 +-# EXTREME17-NEXT: lu32i.d $t0, -1 +-# EXTREME17-NEXT: lu52i.d $t0, $t0, -1 ++# EXTREME17: pcalau12i $t1, -524288 ++# EXTREME17-NEXT: addi.d $t0, $zero, -1912 ++# EXTREME17-NEXT: lu32i.d $t0, 0 ++# EXTREME17-NEXT: lu52i.d $t0, $t0, 0 ++ ++## A case that pcalau12i, lu32i.d and lu52i.d are in different pages. ++## page delta = 0x0000000080000000, page offset = 0x123 ++## %pc_lo12 = 0x111 = 273 ++## %pc_hi20 = 0x80000 = -524288 ++## %pc64_lo20 = 0x00001 = 1 ++## %pc64_hi12 = 0x000 = 0 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x80000111 --section-start=.text=0xff8 -o %t/extreme18 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme18 | FileCheck %s --check-prefix=EXTREME18 ++# EXTREME18: pcalau12i $t1, -524288 ++# EXTREME18-NEXT: addi.d $t0, $zero, 273 ++# EXTREME18-NEXT: lu32i.d $t0, 1 ++# EXTREME18-NEXT: lu52i.d $t0, $t0, 0 + + #--- a.s + .rodata +@@ -303,7 +314,7 @@ x: + .text + .global _start + _start: +- addi.d $t0, $zero, %pc_lo12(x) + pcalau12i $t1, %pc_hi20(x) ++ addi.d $t0, $zero, %pc_lo12(x) + lu32i.d $t0, %pc64_lo20(x) + lu52i.d $t0, $t0, %pc64_hi12(x) +-- +2.20.1 + diff --git a/0014-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch b/0014-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch new file mode 100644 index 0000000..50cafde --- /dev/null +++ b/0014-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch @@ -0,0 +1,33 @@ +From 9df32c3cc1929b8d09f3e5fc07819f2fdef3ae6d Mon Sep 17 00:00:00 2001 +From: licongtian +Date: Wed, 25 Oct 2023 17:35:32 +0800 +Subject: [PATCH 14/42] [Clang][LoongArch] Add ABI implementation of passing + vectors + +(cherry picked from commit eb49b86f5a9b54b0e3c37024334a3c6f6ca88e14) + +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 7483bf6d6d1e..26c68c3583b2 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -321,6 +321,13 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + return ABIArgInfo::getDirect(); + } + ++ // Pass 128-bit/256-bit vector values via vector registers directly. ++ if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && ++ (getTarget().hasFeature("lsx"))) || ++ ((getContext().getTypeSize(Ty) == 256) && ++ getTarget().hasFeature("lasx")))) ++ return ABIArgInfo::getDirect(); ++ + // Complex types for the *f or *d ABI must be passed directly rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { +-- +2.20.1 + diff --git a/0014-Driver-Default-LoongArch-to-fno-direct-access-extern.patch b/0014-Driver-Default-LoongArch-to-fno-direct-access-extern.patch new file mode 100644 index 0000000..d7aec3f --- /dev/null +++ b/0014-Driver-Default-LoongArch-to-fno-direct-access-extern.patch @@ -0,0 +1,64 @@ +From 896bdd6b1b7bbfc0b157ad1aeee724973b58bb62 Mon Sep 17 00:00:00 2001 +From: Fangrui Song +Date: Tue, 14 Nov 2023 00:43:40 -0800 +Subject: [PATCH 14/27] [Driver] Default LoongArch to + -fno-direct-access-external-data for non-PIC (#72221) + +For -fno-pic, if an extern variable is defined in a DSO, a copy +relocation will be needed. However, loongarch*-linux does not and will +not support copy relocations. + +Change Driver to default to -fno-direct-access-external-data for +LoongArch && non-PIC. +Keep Frontend conditions unchanged (-fdirect-access-external-data || +-fno-direct-access-external-data && PIC>0 => direct access). + +Fix #71645 + +(cherry picked from commit 47eeee297775347cbdb7624d6a766c2a3eec4a59) +--- + clang/lib/Driver/ToolChains/Clang.cpp | 7 ++++++- + clang/test/Driver/fdirect-access-external-data.c | 6 ++++++ + 2 files changed, 12 insertions(+), 1 deletion(-) + +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index 37a07b8f224d..777e21af98df 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -5595,10 +5595,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + // defaults to -fno-direct-access-external-data. Pass the option if different + // from the default. + if (Arg *A = Args.getLastArg(options::OPT_fdirect_access_external_data, +- options::OPT_fno_direct_access_external_data)) ++ options::OPT_fno_direct_access_external_data)) { + if (A->getOption().matches(options::OPT_fdirect_access_external_data) != + (PICLevel == 0)) + A->render(Args, CmdArgs); ++ } else if (PICLevel == 0 && Triple.isLoongArch()) { ++ // Some targets default to -fno-direct-access-external-data even for ++ // -fno-pic. ++ CmdArgs.push_back("-fno-direct-access-external-data"); ++ } + + if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) { + CmdArgs.push_back("-fno-plt"); +diff --git a/clang/test/Driver/fdirect-access-external-data.c b/clang/test/Driver/fdirect-access-external-data.c +index f132b1b088af..a6da776e6977 100644 +--- a/clang/test/Driver/fdirect-access-external-data.c ++++ b/clang/test/Driver/fdirect-access-external-data.c +@@ -9,6 +9,12 @@ + // RUN: %clang -### -c -target aarch64 %s -fpic 2>&1 | FileCheck %s --check-prefix=DEFAULT + // RUN: %clang -### -c -target aarch64 %s -fpic -fdirect-access-external-data 2>&1 | FileCheck %s --check-prefix=DIRECT + ++/// loongarch* targets default to -fno-direct-access-external-data even for -fno-pic. ++// RUN: %clang -### -c --target=loongarch64 -fno-pic %s 2>&1 | FileCheck %s --check-prefix=INDIRECT ++// RUN: %clang -### -c --target=loongarch64 -fpie %s 2>&1 | FileCheck %s --check-prefix=DEFAULT ++// RUN: %clang -### -c --target=loongarch32 -fno-pic -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DEFAULT ++// RUN: %clang -### -c --target=loongarch32 -fpie -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DIRECT ++ + // DEFAULT-NOT: direct-access-external-data" + // DIRECT: "-fdirect-access-external-data" + // INDIRECT: "-fno-direct-access-external-data" +-- +2.20.1 + diff --git a/0014-LoongArch-clang-Add-support-for-option-msimd-and-mac.patch b/0014-LoongArch-clang-Add-support-for-option-msimd-and-mac.patch new file mode 100644 index 0000000..fa56fc2 --- /dev/null +++ b/0014-LoongArch-clang-Add-support-for-option-msimd-and-mac.patch @@ -0,0 +1,269 @@ +From 75f907bc6a8ad2f652099aac6221aa2332c8b141 Mon Sep 17 00:00:00 2001 +From: Zhaoxin Yang +Date: Tue, 9 Jul 2024 14:13:19 +0800 +Subject: [PATCH 14/23] [LoongArch][clang] Add support for option `-msimd=` and + macro `__loongarch_simd_width`. (#97984) + +(cherry picked from commit 626c7ce33f850831949e4e724016ddbff3a34990) +--- + .../clang/Basic/DiagnosticDriverKinds.td | 2 + + clang/include/clang/Driver/Options.td | 3 + + clang/lib/Basic/Targets/LoongArch.cpp | 8 +- + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 29 ++++ + clang/test/Driver/loongarch-msimd.c | 129 ++++++++++++++++++ + clang/test/Preprocessor/init-loongarch.c | 3 + + 6 files changed, 172 insertions(+), 2 deletions(-) + create mode 100644 clang/test/Driver/loongarch-msimd.c + +diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td +index 8c751f2c4bda..29f932d21098 100644 +--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td ++++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td +@@ -738,6 +738,8 @@ def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< + "wrong fpu width; LASX depends on 64-bit FPU.">; + def err_drv_loongarch_invalid_simd_option_combination : Error< + "invalid option combination; LASX depends on LSX.">; ++def err_drv_loongarch_invalid_msimd_EQ : Error< ++ "invalid argument '%0' to -msimd=; must be one of: none, lsx, lasx">; + + def err_drv_expand_response_file : Error< + "failed to expand response file: %0">; +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index c94979b4dc34..6f72b19f8c90 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -4200,6 +4200,9 @@ def mlasx : Flag<["-"], "mlasx">, Group, + HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; + def mno_lasx : Flag<["-"], "mno-lasx">, Group, + HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; ++def msimd_EQ : Joined<["-"], "msimd=">, Group, ++ Flags<[TargetSpecific]>, ++ HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">; + def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; +diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp +index 88537989a051..913404240916 100644 +--- a/clang/lib/Basic/Targets/LoongArch.cpp ++++ b/clang/lib/Basic/Targets/LoongArch.cpp +@@ -208,10 +208,14 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, + TuneCPU = ArchName; + Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); + +- if (HasFeatureLSX) ++ if (HasFeatureLASX) { ++ Builder.defineMacro("__loongarch_simd_width", "256"); + Builder.defineMacro("__loongarch_sx", Twine(1)); +- if (HasFeatureLASX) + Builder.defineMacro("__loongarch_asx", Twine(1)); ++ } else if (HasFeatureLSX) { ++ Builder.defineMacro("__loongarch_simd_width", "128"); ++ Builder.defineMacro("__loongarch_sx", Twine(1)); ++ } + + StringRef ABI = getABI(); + if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +index 31153a67ad28..2d9c3f810a06 100644 +--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -207,6 +207,35 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + } else /*-mno-lasx*/ + Features.push_back("-lasx"); + } ++ ++ // Select lsx/lasx feature determined by -msimd=. ++ // Option -msimd= has lower priority than -m[no-]lsx and -m[no-]lasx. ++ if (const Arg *A = Args.getLastArg(options::OPT_msimd_EQ)) { ++ StringRef MSIMD = A->getValue(); ++ if (MSIMD == "lsx") { ++ // Option -msimd=lsx depends on 64-bit FPU. ++ // -m*-float and -mfpu=none/0/32 conflict with -mlsx. ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width) << /*LSX*/ 0; ++ // The previous option does not contain feature -lsx. ++ else if (llvm::find(Features, "-lsx") == Features.end()) ++ Features.push_back("+lsx"); ++ } else if (MSIMD == "lasx") { ++ // Option -msimd=lasx depends on 64-bit FPU and LSX. ++ // -m*-float and -mfpu=none/0/32 conflict with -mlsx. ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width) << /*LASX*/ 1; ++ else if (llvm::find(Features, "-lsx") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); ++ // The previous option does not contain feature -lasx. ++ else if (llvm::find(Features, "-lasx") == Features.end()) { ++ Features.push_back("+lsx"); ++ Features.push_back("+lasx"); ++ } ++ } else if (MSIMD != "none") { ++ D.Diag(diag::err_drv_loongarch_invalid_msimd_EQ) << MSIMD; ++ } ++ } + } + + std::string loongarch::postProcessTargetCPUString(const std::string &CPU, +diff --git a/clang/test/Driver/loongarch-msimd.c b/clang/test/Driver/loongarch-msimd.c +new file mode 100644 +index 000000000000..984f3e8bf2bf +--- /dev/null ++++ b/clang/test/Driver/loongarch-msimd.c +@@ -0,0 +1,129 @@ ++/// Test -msimd options. ++ ++/// COM: -msimd=none ++// RUN: %clang --target=loongarch64 -mlasx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,LASX ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,LASX ++ ++// RUN: %clang --target=loongarch64 -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++ ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++ ++ ++/// COM: -msimd=lsx ++// RUN: %clang --target=loongarch64 -mlasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,LASX ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,LASX ++ ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++ ++// RUN: %clang --target=loongarch64 -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++ ++ ++/// COM: -msimd=lasx ++// RUN: %clang --target=loongarch64 -msimd=lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,LASX ++// RUN: %clang --target=loongarch64 -mlasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,LASX ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,LASX ++// RUN: %clang --target=loongarch64 -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,LASX ++ ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++ ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX ++ ++ ++// LSX: "-target-feature" "+lsx" ++// LASX: "-target-feature" "+lasx" ++// NOLSX-NOT: "-target-feature" "+lsx" ++// NOLASX-NOT: "-target-feature" "+lasx" +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +index e235a7283021..154ad82e0f8c 100644 +--- a/clang/test/Preprocessor/init-loongarch.c ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -817,6 +817,7 @@ + // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ + // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s + // MLSX-NOT: #define __loongarch_asx ++// MLSX: #define __loongarch_simd_width 128 + // MLSX: #define __loongarch_sx 1 + + // RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ +@@ -828,6 +829,7 @@ + // RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ + // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s + // MLASX: #define __loongarch_asx 1 ++// MLASX: #define __loongarch_simd_width 256 + // MLASX: #define __loongarch_sx 1 + + // RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ +@@ -841,4 +843,5 @@ + // RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ + // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s + // MNO-LSX-NOT: #define __loongarch_asx ++// MNO-LSX-NOT: #define __loongarch_simd_width + // MNO-LSX-NOT: #define __loongarch_sx +-- +2.20.1 + diff --git a/0010-Backport-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-isMicroMips-special-case.patch b/0014-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-i.patch similarity index 99% rename from 0010-Backport-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-isMicroMips-special-case.patch rename to 0014-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-i.patch index 1d370ee..d2e7643 100644 --- a/0010-Backport-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-isMicroMips-special-case.patch +++ b/0014-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-i.patch @@ -37,7 +37,6 @@ which is needed to proper support R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128. Differential Revision: https://reviews.llvm.org/D157655 (cherry picked from commit 4c89277095ee7cda3d20e0f5f18b384212069778) -Change-Id: Iedd73e0c61856c30fde442309fc16d4327829f1a --- llvm/lib/MC/MCExpr.cpp | 5 ----- llvm/test/CodeGen/Mips/micromips-b-range.ll | 8 ++++---- diff --git a/0015-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch b/0015-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch new file mode 100644 index 0000000..81cf1a4 --- /dev/null +++ b/0015-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch @@ -0,0 +1,5093 @@ +From 91ff2e72026568ecca978506770f25633a5b536e Mon Sep 17 00:00:00 2001 +From: licongtian +Date: Wed, 25 Oct 2023 17:41:03 +0800 +Subject: [PATCH 15/42] [Clang][LoongArch] Support the builtin functions for + LSX + +This patch does the following work: +- Define the builtin functions for LSX +- Add the header file lsxintrin.h +- Add the immediate number range checking for LSX builtins + +(cherry picked from commit d6bfa3341181a80de6c8aede807fc1acc3ce8d9b) + +--- + .../include/clang/Basic/BuiltinsLoongArch.def | 43 +- + .../clang/Basic/BuiltinsLoongArchBase.def | 53 + + .../clang/Basic/BuiltinsLoongArchLSX.def | 953 +++++ + clang/lib/Headers/CMakeLists.txt | 1 + + clang/lib/Headers/lsxintrin.h | 3726 +++++++++++++++++ + clang/lib/Sema/SemaChecking.cpp | 229 +- + 6 files changed, 4965 insertions(+), 40 deletions(-) + create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchBase.def + create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLSX.def + create mode 100644 clang/lib/Headers/lsxintrin.h + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 20510e18fe58..9ec19c31095a 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -15,46 +15,11 @@ + # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) + #endif + +-// TODO: Support more builtins. +-TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") +-TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") +-TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") +-TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") ++// Definition of LoongArch basic builtins. ++#include "clang/Basic/BuiltinsLoongArchBase.def" + +-TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") ++// Definition of LSX builtins. ++#include "clang/Basic/BuiltinsLoongArchLSX.def" + + #undef BUILTIN + #undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def +new file mode 100644 +index 000000000000..cbb239223aae +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def +@@ -0,0 +1,53 @@ ++//============------------ BuiltinsLoongArchBase.def -------------*- C++ -*-==// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific basic builtin function database. ++// Users of this file must define the BUILTIN macro to make use of this ++// information. ++// ++//===----------------------------------------------------------------------===// ++ ++TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") ++TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") ++TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") ++TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +new file mode 100644 +index 000000000000..8e6aec886c50 +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +@@ -0,0 +1,953 @@ ++//=============------------- BuiltinsLoongArchLSX.def --------------- C++ -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific LSX builtin function database. ++// Users of this file must define the BUILTIN macro to make use of this ++// information. ++// ++//===----------------------------------------------------------------------===// ++ ++TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++ ++TARGET_BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx") ++ ++ ++TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc", "lsx") +diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt +index db47de2ad965..1d5573b71e6d 100644 +--- a/clang/lib/Headers/CMakeLists.txt ++++ b/clang/lib/Headers/CMakeLists.txt +@@ -78,6 +78,7 @@ set(hlsl_files + + set(loongarch_files + larchintrin.h ++ lsxintrin.h + ) + + set(mips_msa_files +diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h +new file mode 100644 +index 000000000000..a29bc7757ab5 +--- /dev/null ++++ b/clang/lib/Headers/lsxintrin.h +@@ -0,0 +1,3726 @@ ++/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== ++ * ++ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++ * See https://llvm.org/LICENSE.txt for license information. ++ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++ * ++ *===-----------------------------------------------------------------------=== ++ */ ++ ++#ifndef _LOONGSON_SXINTRIN_H ++#define _LOONGSON_SXINTRIN_H 1 ++ ++#if defined(__loongarch_sx) ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) ++ ++#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) ++ ++#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) ++ ++#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) ++ ++#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) ++ ++#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) ++ ++#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) ++ ++#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) ++ ++#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) ++ ++#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) ++ ++#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) ++ ++#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) ++ ++#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) ++ ++#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) ++ ++#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) ++ ++#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) ++ ++#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) ++ ++#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) ++ ++#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_b(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_h(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_w(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_d(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); ++} ++ ++#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vand_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vnor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vxor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) ++ ++#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_b(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_h(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_w(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_d(long int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); ++} ++ ++#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) ++ ++#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) ++ ++#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) ++ ++#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) ++ ++#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfadd_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfadd_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsub_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsub_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmul_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmul_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfdiv_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfdiv_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { ++ return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmin_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmin_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmina_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmina_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmax_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmax_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmaxa_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmaxa_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrecip_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrecip_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrint_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrint_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vflogb_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vflogb_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvth_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvth_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvtl_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvtl_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_w(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_l(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_wu(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_lu(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vandn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) ++ ++#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) ++ ++#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_l(__m128i _1, __m128i _2) { ++ return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftinth_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffinth_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffintl_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrpl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrph_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrml_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrmh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrnel_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrneh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrne_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrne_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrz_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrz_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrp_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrp_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrm_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrm_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); ++} ++ ++#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) ++ ++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) ++ ++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) ++ ++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskgez_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsknz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_h_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_w_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_d_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_hu_bu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_wu_hu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_du_wu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); ++} ++ ++#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); ++} ++ ++#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) ++ ++#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vorn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); ++} ++ ++#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vldx(void const *_1, long int _2) { ++ return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lsx_vstx(__m128i _1, void *_2, long int _3) { ++ return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); ++} ++ ++#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) ++ ++#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) ++ ++#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) ++ ++#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) ++ ++#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) ++ ++#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) ++ ++#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) ++ ++#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) ++ ++#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) ++ ++#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); ++} ++ ++#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) ++ ++#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) ++ ++#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) ++ ++#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) ++ ++#endif /* defined(__loongarch_sx) */ ++#endif /* _LOONGSON_SXINTRIN_H */ +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index a8416bf4de92..d1b015502725 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -3826,6 +3826,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + switch (BuiltinID) { + default: + break; ++ // Basic intrinsics. + case LoongArch::BI__builtin_loongarch_cacop_d: + case LoongArch::BI__builtin_loongarch_cacop_w: { + SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); +@@ -3854,8 +3855,234 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + case LoongArch::BI__builtin_loongarch_movfcsr2gr: + case LoongArch::BI__builtin_loongarch_movgr2fcsr: + return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); +- } + ++ // LSX intrinsics. ++ case LoongArch::BI__builtin_lsx_vbitclri_b: ++ case LoongArch::BI__builtin_lsx_vbitrevi_b: ++ case LoongArch::BI__builtin_lsx_vbitseti_b: ++ case LoongArch::BI__builtin_lsx_vsat_b: ++ case LoongArch::BI__builtin_lsx_vsat_bu: ++ case LoongArch::BI__builtin_lsx_vslli_b: ++ case LoongArch::BI__builtin_lsx_vsrai_b: ++ case LoongArch::BI__builtin_lsx_vsrari_b: ++ case LoongArch::BI__builtin_lsx_vsrli_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_h_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: ++ case LoongArch::BI__builtin_lsx_vrotri_b: ++ case LoongArch::BI__builtin_lsx_vsrlri_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lsx_vbitclri_h: ++ case LoongArch::BI__builtin_lsx_vbitrevi_h: ++ case LoongArch::BI__builtin_lsx_vbitseti_h: ++ case LoongArch::BI__builtin_lsx_vsat_h: ++ case LoongArch::BI__builtin_lsx_vsat_hu: ++ case LoongArch::BI__builtin_lsx_vslli_h: ++ case LoongArch::BI__builtin_lsx_vsrai_h: ++ case LoongArch::BI__builtin_lsx_vsrari_h: ++ case LoongArch::BI__builtin_lsx_vsrli_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_w_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: ++ case LoongArch::BI__builtin_lsx_vrotri_h: ++ case LoongArch::BI__builtin_lsx_vsrlri_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lsx_vssrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrarni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrani_b_h: ++ case LoongArch::BI__builtin_lsx_vssrani_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrani_b_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); ++ case LoongArch::BI__builtin_lsx_vslei_bu: ++ case LoongArch::BI__builtin_lsx_vslei_hu: ++ case LoongArch::BI__builtin_lsx_vslei_wu: ++ case LoongArch::BI__builtin_lsx_vslei_du: ++ case LoongArch::BI__builtin_lsx_vslti_bu: ++ case LoongArch::BI__builtin_lsx_vslti_hu: ++ case LoongArch::BI__builtin_lsx_vslti_wu: ++ case LoongArch::BI__builtin_lsx_vslti_du: ++ case LoongArch::BI__builtin_lsx_vmaxi_bu: ++ case LoongArch::BI__builtin_lsx_vmaxi_hu: ++ case LoongArch::BI__builtin_lsx_vmaxi_wu: ++ case LoongArch::BI__builtin_lsx_vmaxi_du: ++ case LoongArch::BI__builtin_lsx_vmini_bu: ++ case LoongArch::BI__builtin_lsx_vmini_hu: ++ case LoongArch::BI__builtin_lsx_vmini_wu: ++ case LoongArch::BI__builtin_lsx_vmini_du: ++ case LoongArch::BI__builtin_lsx_vaddi_bu: ++ case LoongArch::BI__builtin_lsx_vaddi_hu: ++ case LoongArch::BI__builtin_lsx_vaddi_wu: ++ case LoongArch::BI__builtin_lsx_vaddi_du: ++ case LoongArch::BI__builtin_lsx_vbitclri_w: ++ case LoongArch::BI__builtin_lsx_vbitrevi_w: ++ case LoongArch::BI__builtin_lsx_vbitseti_w: ++ case LoongArch::BI__builtin_lsx_vsat_w: ++ case LoongArch::BI__builtin_lsx_vsat_wu: ++ case LoongArch::BI__builtin_lsx_vslli_w: ++ case LoongArch::BI__builtin_lsx_vsrai_w: ++ case LoongArch::BI__builtin_lsx_vsrari_w: ++ case LoongArch::BI__builtin_lsx_vsrli_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_d_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_du_wu: ++ case LoongArch::BI__builtin_lsx_vsrlri_w: ++ case LoongArch::BI__builtin_lsx_vrotri_w: ++ case LoongArch::BI__builtin_lsx_vsubi_bu: ++ case LoongArch::BI__builtin_lsx_vsubi_hu: ++ case LoongArch::BI__builtin_lsx_vbsrl_v: ++ case LoongArch::BI__builtin_lsx_vbsll_v: ++ case LoongArch::BI__builtin_lsx_vsubi_wu: ++ case LoongArch::BI__builtin_lsx_vsubi_du: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); ++ case LoongArch::BI__builtin_lsx_vssrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrarni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrani_h_w: ++ case LoongArch::BI__builtin_lsx_vssrani_hu_w: ++ case LoongArch::BI__builtin_lsx_vsrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrani_h_w: ++ case LoongArch::BI__builtin_lsx_vfrstpi_b: ++ case LoongArch::BI__builtin_lsx_vfrstpi_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); ++ case LoongArch::BI__builtin_lsx_vbitclri_d: ++ case LoongArch::BI__builtin_lsx_vbitrevi_d: ++ case LoongArch::BI__builtin_lsx_vbitseti_d: ++ case LoongArch::BI__builtin_lsx_vsat_d: ++ case LoongArch::BI__builtin_lsx_vsat_du: ++ case LoongArch::BI__builtin_lsx_vslli_d: ++ case LoongArch::BI__builtin_lsx_vsrai_d: ++ case LoongArch::BI__builtin_lsx_vsrli_d: ++ case LoongArch::BI__builtin_lsx_vsrari_d: ++ case LoongArch::BI__builtin_lsx_vrotri_d: ++ case LoongArch::BI__builtin_lsx_vsrlri_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); ++ case LoongArch::BI__builtin_lsx_vssrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrarni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrani_w_d: ++ case LoongArch::BI__builtin_lsx_vssrani_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrani_w_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); ++ case LoongArch::BI__builtin_lsx_vssrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrarni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrani_d_q: ++ case LoongArch::BI__builtin_lsx_vssrani_du_q: ++ case LoongArch::BI__builtin_lsx_vsrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_du_q: ++ case LoongArch::BI__builtin_lsx_vsrani_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlni_d_q: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); ++ case LoongArch::BI__builtin_lsx_vseqi_b: ++ case LoongArch::BI__builtin_lsx_vseqi_h: ++ case LoongArch::BI__builtin_lsx_vseqi_w: ++ case LoongArch::BI__builtin_lsx_vseqi_d: ++ case LoongArch::BI__builtin_lsx_vslti_b: ++ case LoongArch::BI__builtin_lsx_vslti_h: ++ case LoongArch::BI__builtin_lsx_vslti_w: ++ case LoongArch::BI__builtin_lsx_vslti_d: ++ case LoongArch::BI__builtin_lsx_vslei_b: ++ case LoongArch::BI__builtin_lsx_vslei_h: ++ case LoongArch::BI__builtin_lsx_vslei_w: ++ case LoongArch::BI__builtin_lsx_vslei_d: ++ case LoongArch::BI__builtin_lsx_vmaxi_b: ++ case LoongArch::BI__builtin_lsx_vmaxi_h: ++ case LoongArch::BI__builtin_lsx_vmaxi_w: ++ case LoongArch::BI__builtin_lsx_vmaxi_d: ++ case LoongArch::BI__builtin_lsx_vmini_b: ++ case LoongArch::BI__builtin_lsx_vmini_h: ++ case LoongArch::BI__builtin_lsx_vmini_w: ++ case LoongArch::BI__builtin_lsx_vmini_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); ++ case LoongArch::BI__builtin_lsx_vandi_b: ++ case LoongArch::BI__builtin_lsx_vnori_b: ++ case LoongArch::BI__builtin_lsx_vori_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_h: ++ case LoongArch::BI__builtin_lsx_vshuf4i_w: ++ case LoongArch::BI__builtin_lsx_vxori_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); ++ case LoongArch::BI__builtin_lsx_vbitseli_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_d: ++ case LoongArch::BI__builtin_lsx_vextrins_b: ++ case LoongArch::BI__builtin_lsx_vextrins_h: ++ case LoongArch::BI__builtin_lsx_vextrins_w: ++ case LoongArch::BI__builtin_lsx_vextrins_d: ++ case LoongArch::BI__builtin_lsx_vpermi_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_b: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_bu: ++ case LoongArch::BI__builtin_lsx_vreplvei_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_h: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_hu: ++ case LoongArch::BI__builtin_lsx_vreplvei_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_w: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_wu: ++ case LoongArch::BI__builtin_lsx_vreplvei_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_d: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_du: ++ case LoongArch::BI__builtin_lsx_vreplvei_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); ++ case LoongArch::BI__builtin_lsx_vstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ case LoongArch::BI__builtin_lsx_vstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ case LoongArch::BI__builtin_lsx_vstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ case LoongArch::BI__builtin_lsx_vstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); ++ case LoongArch::BI__builtin_lsx_vldrepl_b: ++ case LoongArch::BI__builtin_lsx_vld: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); ++ case LoongArch::BI__builtin_lsx_vldrepl_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); ++ case LoongArch::BI__builtin_lsx_vldrepl_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); ++ case LoongArch::BI__builtin_lsx_vldrepl_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); ++ case LoongArch::BI__builtin_lsx_vst: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ case LoongArch::BI__builtin_lsx_vldi: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); ++ case LoongArch::BI__builtin_lsx_vrepli_b: ++ case LoongArch::BI__builtin_lsx_vrepli_h: ++ case LoongArch::BI__builtin_lsx_vrepli_w: ++ case LoongArch::BI__builtin_lsx_vrepli_d: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); ++ } + return false; + } + +-- +2.20.1 + diff --git a/0015-LoongArch-MC-Refine-MCInstrAnalysis-based-on-registe.patch b/0015-LoongArch-MC-Refine-MCInstrAnalysis-based-on-registe.patch new file mode 100644 index 0000000..ec17786 --- /dev/null +++ b/0015-LoongArch-MC-Refine-MCInstrAnalysis-based-on-registe.patch @@ -0,0 +1,240 @@ +From 0098ff513a67219cc9f647bf50e18505b264195c Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Fri, 10 Nov 2023 15:54:33 +0800 +Subject: [PATCH 15/27] [LoongArch][MC] Refine MCInstrAnalysis based on + registers used (#71276) + +MCInstrAnalysis can return properties of instructions (e.g., isCall(), +isBranch(),...) based on the informations that MCInstrDesc can get from +*InstrInfo*.td files. These infos are based on opcodes only, but JIRL +can have different properties based on different registers used. + +So this patch refines several MCInstrAnalysis methods: isTerminator, +isCall,isReturn,isBranch,isUnconditionalBranch and isIndirectBranch. + +This patch also allows BOLT which will be supported on LoongArch later +to get right instruction infos. + +(cherry picked from commit f7d784709673ca185f6fb0633fd53c72e81f2ae1) +--- + .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 76 +++++++++++++ + .../unittests/Target/LoongArch/CMakeLists.txt | 1 + + .../Target/LoongArch/MCInstrAnalysisTest.cpp | 107 ++++++++++++++++++ + 3 files changed, 184 insertions(+) + create mode 100644 llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +index 942e667bc261..d580c3457fec 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +@@ -104,6 +104,82 @@ public: + + return false; + } ++ ++ bool isTerminator(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isTerminator(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0; ++ } ++ } ++ ++ bool isCall(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isCall(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() != LoongArch::R0; ++ } ++ } ++ ++ bool isReturn(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isReturn(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() == LoongArch::R1; ++ } ++ } ++ ++ bool isBranch(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isBranch(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() != LoongArch::R1; ++ } ++ } ++ ++ bool isUnconditionalBranch(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isUnconditionalBranch(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() != LoongArch::R1; ++ } ++ } ++ ++ bool isIndirectBranch(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isIndirectBranch(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() != LoongArch::R1; ++ } ++ } + }; + + } // end namespace +diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt +index fef4f8e15461..e6f8ec073721 100644 +--- a/llvm/unittests/Target/LoongArch/CMakeLists.txt ++++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt +@@ -20,6 +20,7 @@ set(LLVM_LINK_COMPONENTS + + add_llvm_target_unittest(LoongArchTests + InstSizes.cpp ++ MCInstrAnalysisTest.cpp + ) + + set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests") +diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +new file mode 100644 +index 000000000000..6a208d274a0d +--- /dev/null ++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +@@ -0,0 +1,107 @@ ++//===- MCInstrAnalysisTest.cpp - LoongArchMCInstrAnalysis unit tests ------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "llvm/MC/MCInstrAnalysis.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/MC/MCInstBuilder.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/TargetSelect.h" ++ ++#include "gtest/gtest.h" ++ ++#include ++ ++using namespace llvm; ++ ++namespace { ++ ++class InstrAnalysisTest : public testing::TestWithParam { ++protected: ++ std::unique_ptr Info; ++ std::unique_ptr Analysis; ++ ++ static void SetUpTestSuite() { ++ LLVMInitializeLoongArchTargetInfo(); ++ LLVMInitializeLoongArchTarget(); ++ LLVMInitializeLoongArchTargetMC(); ++ } ++ ++ InstrAnalysisTest() { ++ std::string Error; ++ const Target *TheTarget = ++ TargetRegistry::lookupTarget(Triple::normalize(GetParam()), Error); ++ Info = std::unique_ptr(TheTarget->createMCInstrInfo()); ++ Analysis = std::unique_ptr( ++ TheTarget->createMCInstrAnalysis(Info.get())); ++ } ++}; ++ ++} // namespace ++ ++static MCInst beq() { ++ return MCInstBuilder(LoongArch::BEQ) ++ .addReg(LoongArch::R0) ++ .addReg(LoongArch::R1) ++ .addImm(32); ++} ++ ++static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } ++ ++static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { ++ return MCInstBuilder(LoongArch::JIRL).addReg(RD).addReg(RJ).addImm(16); ++} ++ ++TEST_P(InstrAnalysisTest, IsTerminator) { ++ EXPECT_TRUE(Analysis->isTerminator(beq())); ++ EXPECT_FALSE(Analysis->isTerminator(bl())); ++ EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); ++} ++ ++TEST_P(InstrAnalysisTest, IsCall) { ++ EXPECT_FALSE(Analysis->isCall(beq())); ++ EXPECT_TRUE(Analysis->isCall(bl())); ++ EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); ++} ++ ++TEST_P(InstrAnalysisTest, IsReturn) { ++ EXPECT_FALSE(Analysis->isReturn(beq())); ++ EXPECT_FALSE(Analysis->isReturn(bl())); ++ EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R1))); ++} ++ ++TEST_P(InstrAnalysisTest, IsBranch) { ++ EXPECT_TRUE(Analysis->isBranch(beq())); ++ EXPECT_FALSE(Analysis->isBranch(bl())); ++ EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); ++} ++ ++TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); ++ EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); ++ EXPECT_FALSE( ++ Analysis->isUnconditionalBranch(jirl(LoongArch::R0, LoongArch::R1))); ++} ++ ++TEST_P(InstrAnalysisTest, IsIndirectBranch) { ++ EXPECT_FALSE(Analysis->isIndirectBranch(beq())); ++ EXPECT_FALSE(Analysis->isIndirectBranch(bl())); ++ EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R0, LoongArch::R1))); ++} ++ ++INSTANTIATE_TEST_SUITE_P(LA32And64, InstrAnalysisTest, ++ testing::Values("loongarch32", "loongarch64")); +-- +2.20.1 + diff --git a/0015-LoongArch-Precommit-test-case-to-show-bug-in-LoongAr.patch b/0015-LoongArch-Precommit-test-case-to-show-bug-in-LoongAr.patch deleted file mode 100644 index dcc3b2d..0000000 --- a/0015-LoongArch-Precommit-test-case-to-show-bug-in-LoongAr.patch +++ /dev/null @@ -1,36 +0,0 @@ -From c9c67d85e77d22d07e68c19eea4a2ef3b1ea364c Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Sat, 7 Jun 2025 15:10:24 +0800 -Subject: [PATCH 1/3] [LoongArch] Precommit test case to show bug in - LoongArchISelDagToDag - -The optimization level should not be restored into O2. - -(cherry picked from commit fcc82cfa9394b2bd4380acdcf0e2854caee5a47a) ---- - llvm/test/CodeGen/LoongArch/isel-optnone.ll | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/isel-optnone.ll - -diff --git a/llvm/test/CodeGen/LoongArch/isel-optnone.ll b/llvm/test/CodeGen/LoongArch/isel-optnone.ll -new file mode 100644 -index 000000000000..d44f1405d0c1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/isel-optnone.ll -@@ -0,0 +1,13 @@ -+; REQUIRES: asserts -+; RUN: llc %s -O0 -mtriple=loongarch64 -o /dev/null -debug-only=isel 2>&1 | FileCheck %s -+ -+define void @fooOptnone() #0 { -+; CHECK: Changing optimization level for Function fooOptnone -+; CHECK: Before: -O2 ; After: -O0 -+ -+; CHECK: Restoring optimization level for Function fooOptnone -+; CHECK: Before: -O0 ; After: -O2 -+ ret void -+} -+ -+attributes #0 = { nounwind optnone noinline } --- -2.20.1 - diff --git a/0015-LoongArch-clang-Modify-loongarch-msimd.c-to-avoid-gr.patch b/0015-LoongArch-clang-Modify-loongarch-msimd.c-to-avoid-gr.patch new file mode 100644 index 0000000..f325c05 --- /dev/null +++ b/0015-LoongArch-clang-Modify-loongarch-msimd.c-to-avoid-gr.patch @@ -0,0 +1,154 @@ +From dae4195215cbd6c89a86c881aee4c90d8ec933ad Mon Sep 17 00:00:00 2001 +From: Zhaoxin Yang +Date: Thu, 11 Jul 2024 17:43:38 +0800 +Subject: [PATCH 15/23] [LoongArch][clang] Modify `loongarch-msimd.c` to avoid + `grep -o`. NFC (#98442) + +Address buildbot failure: +https://lab.llvm.org/buildbot/#/builders/64/builds/250/steps/6/logs/FAIL__Clang__loongarch-msimd_c + +(cherry picked from commit 74b933c28e777fdc04e50f5f96e4f7a4ad1e79a6) +--- + clang/test/Driver/loongarch-msimd.c | 42 +++-------------------------- + 1 file changed, 4 insertions(+), 38 deletions(-) + +diff --git a/clang/test/Driver/loongarch-msimd.c b/clang/test/Driver/loongarch-msimd.c +index 984f3e8bf2bf..cd463300c874 100644 +--- a/clang/test/Driver/loongarch-msimd.c ++++ b/clang/test/Driver/loongarch-msimd.c +@@ -2,128 +2,94 @@ + + /// COM: -msimd=none + // RUN: %clang --target=loongarch64 -mlasx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,LASX + // RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,LASX + + // RUN: %clang --target=loongarch64 -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + // RUN: %clang --target=loongarch64 -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + + /// COM: -msimd=lsx + // RUN: %clang --target=loongarch64 -mlasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,LASX + // RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,LASX + + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + + // RUN: %clang --target=loongarch64 -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + // RUN: %clang --target=loongarch64 -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + + /// COM: -msimd=lasx + // RUN: %clang --target=loongarch64 -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,LASX + // RUN: %clang --target=loongarch64 -mlasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,LASX + // RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,LASX + // RUN: %clang --target=loongarch64 -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,LASX + + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + +-// LSX: "-target-feature" "+lsx" +-// LASX: "-target-feature" "+lasx" ++// NOLSX-NOT: "-target-feature" "+lsx" ++// NOLASX-NOT: "-target-feature" "+lasx" ++// LSX-DAG: "-target-feature" "+lsx" ++// LASX-DAG: "-target-feature" "+lasx" + // NOLSX-NOT: "-target-feature" "+lsx" + // NOLASX-NOT: "-target-feature" "+lasx" +-- +2.20.1 + diff --git a/0016-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch b/0016-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch new file mode 100644 index 0000000..a3c0967 --- /dev/null +++ b/0016-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch @@ -0,0 +1,5142 @@ +From 55045298ccb1676f5534c94d9cfa35f09b83fd4b Mon Sep 17 00:00:00 2001 +From: licongtian +Date: Wed, 25 Oct 2023 17:44:06 +0800 +Subject: [PATCH 16/42] [Clang][LoongArch] Support the builtin functions for + LASX + +This patch does the following work: +- Define the builtin functions for LASX +- Add the header files lasxintrin.h + +(cherry picked from commit a4005e729c8d9dba9ba19f3ce4ad5b60e64dc467) + +--- + .../include/clang/Basic/BuiltinsLoongArch.def | 3 + + .../clang/Basic/BuiltinsLoongArchLASX.def | 982 +++++ + clang/lib/Headers/CMakeLists.txt | 1 + + clang/lib/Headers/lasxintrin.h | 3860 +++++++++++++++++ + clang/lib/Sema/SemaChecking.cpp | 227 + + 5 files changed, 5073 insertions(+) + create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLASX.def + create mode 100644 clang/lib/Headers/lasxintrin.h + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 9ec19c31095a..95359a3fdc71 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -21,5 +21,8 @@ + // Definition of LSX builtins. + #include "clang/Basic/BuiltinsLoongArchLSX.def" + ++// Definition of LASX builtins. ++#include "clang/Basic/BuiltinsLoongArchLASX.def" ++ + #undef BUILTIN + #undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +new file mode 100644 +index 000000000000..3de200f665b6 +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +@@ -0,0 +1,982 @@ ++//=BuiltinsLoongArchLASX.def - LoongArch Builtin function database -- C++ -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific LASX builtin function database. ++// Users of this file must define the BUILTIN macro to make use of this ++// information. ++// ++//===----------------------------------------------------------------------===// ++ ++TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++ ++TARGET_BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx") ++ ++ ++TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") +diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt +index 1d5573b71e6d..356009ae9157 100644 +--- a/clang/lib/Headers/CMakeLists.txt ++++ b/clang/lib/Headers/CMakeLists.txt +@@ -78,6 +78,7 @@ set(hlsl_files + + set(loongarch_files + larchintrin.h ++ lasxintrin.h + lsxintrin.h + ) + +diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h +new file mode 100644 +index 000000000000..6b4d5012a24b +--- /dev/null ++++ b/clang/lib/Headers/lasxintrin.h +@@ -0,0 +1,3860 @@ ++/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== ++ * ++ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++ * See https://llvm.org/LICENSE.txt for license information. ++ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++ * ++ *===-----------------------------------------------------------------------=== ++ */ ++ ++#ifndef _LOONGSON_ASXINTRIN_H ++#define _LOONGSON_ASXINTRIN_H 1 ++ ++#if defined(__loongarch_asx) ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); ++typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); ++typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) ++ ++#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) ++ ++#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) ++ ++#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) ++ ++#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) ++ ++#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) ++ ++#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) ++ ++#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) ++ ++#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) ++ ++#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) ++ ++#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) ++ ++#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) ++ ++#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) ++ ++#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) ++ ++#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvand_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvnor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvxor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); ++} ++ ++#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) ++ ++#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_b(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_h(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_w(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_d(long int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfadd_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfadd_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsub_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsub_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmul_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmul_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfdiv_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfdiv_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { ++ return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmin_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmin_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmina_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmina_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmax_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmax_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrecip_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrecip_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrint_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrint_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvflogb_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvflogb_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvth_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvth_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvtl_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvtl_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_w(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_l(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_wu(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_lu(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_b(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_h(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_w(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_d(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); ++} ++ ++#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvandn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) ++ ++#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) ++ ++#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_l(__m256i _1, __m256i _2) { ++ return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftinth_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffinth_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffintl_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrph_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrpl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrmh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrml_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrneh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrnel_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrne_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrne_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrz_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrz_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrp_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrp_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrm_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrm_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); ++} ++ ++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) ++ ++#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) ++ ++#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvorn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); ++} ++ ++#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvldx(void const *_1, long int _2) { ++ return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lasx_xvstx(__m256i _1, void *_2, long int _3) { ++ return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); ++} ++ ++#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) ++ ++#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_q(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); ++} ++ ++#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvperm_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); ++} ++ ++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) ++ ++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) ++ ++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) ++ ++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskgez_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsknz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); ++} ++ ++#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); ++} ++ ++#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) ++ ++#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) ++ ++#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) ++ ++#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) ++ ++#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) ++ ++#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) ++ ++#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) ++ ++#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) ++ ++#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) ++ ++#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); ++} ++ ++#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ ++ ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) ++ ++#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ ++ ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) ++ ++#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) ++ ++#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) ++ ++#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) ++ ++#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) ++ ++#endif /* defined(__loongarch_asx). */ ++#endif /* _LOONGSON_ASXINTRIN_H. */ +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index d1b015502725..5ee20554c4cf 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -4082,6 +4082,233 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + case LoongArch::BI__builtin_lsx_vrepli_w: + case LoongArch::BI__builtin_lsx_vrepli_d: + return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); ++ ++ // LASX intrinsics. ++ case LoongArch::BI__builtin_lasx_xvbitclri_b: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_b: ++ case LoongArch::BI__builtin_lasx_xvbitseti_b: ++ case LoongArch::BI__builtin_lasx_xvsat_b: ++ case LoongArch::BI__builtin_lasx_xvsat_bu: ++ case LoongArch::BI__builtin_lasx_xvslli_b: ++ case LoongArch::BI__builtin_lasx_xvsrai_b: ++ case LoongArch::BI__builtin_lasx_xvsrari_b: ++ case LoongArch::BI__builtin_lasx_xvsrli_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_h_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: ++ case LoongArch::BI__builtin_lasx_xvrotri_b: ++ case LoongArch::BI__builtin_lasx_xvsrlri_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvbitclri_h: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_h: ++ case LoongArch::BI__builtin_lasx_xvbitseti_h: ++ case LoongArch::BI__builtin_lasx_xvsat_h: ++ case LoongArch::BI__builtin_lasx_xvsat_hu: ++ case LoongArch::BI__builtin_lasx_xvslli_h: ++ case LoongArch::BI__builtin_lasx_xvsrai_h: ++ case LoongArch::BI__builtin_lasx_xvsrari_h: ++ case LoongArch::BI__builtin_lasx_xvsrli_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_w_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: ++ case LoongArch::BI__builtin_lasx_xvrotri_h: ++ case LoongArch::BI__builtin_lasx_xvsrlri_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvssrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_bu_h: ++ case LoongArch::BI__builtin_lasx_xvsrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvsrani_b_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvslei_bu: ++ case LoongArch::BI__builtin_lasx_xvslei_hu: ++ case LoongArch::BI__builtin_lasx_xvslei_wu: ++ case LoongArch::BI__builtin_lasx_xvslei_du: ++ case LoongArch::BI__builtin_lasx_xvslti_bu: ++ case LoongArch::BI__builtin_lasx_xvslti_hu: ++ case LoongArch::BI__builtin_lasx_xvslti_wu: ++ case LoongArch::BI__builtin_lasx_xvslti_du: ++ case LoongArch::BI__builtin_lasx_xvmaxi_bu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_hu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_wu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_du: ++ case LoongArch::BI__builtin_lasx_xvmini_bu: ++ case LoongArch::BI__builtin_lasx_xvmini_hu: ++ case LoongArch::BI__builtin_lasx_xvmini_wu: ++ case LoongArch::BI__builtin_lasx_xvmini_du: ++ case LoongArch::BI__builtin_lasx_xvaddi_bu: ++ case LoongArch::BI__builtin_lasx_xvaddi_hu: ++ case LoongArch::BI__builtin_lasx_xvaddi_wu: ++ case LoongArch::BI__builtin_lasx_xvaddi_du: ++ case LoongArch::BI__builtin_lasx_xvbitclri_w: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_w: ++ case LoongArch::BI__builtin_lasx_xvbitseti_w: ++ case LoongArch::BI__builtin_lasx_xvsat_w: ++ case LoongArch::BI__builtin_lasx_xvsat_wu: ++ case LoongArch::BI__builtin_lasx_xvslli_w: ++ case LoongArch::BI__builtin_lasx_xvsrai_w: ++ case LoongArch::BI__builtin_lasx_xvsrari_w: ++ case LoongArch::BI__builtin_lasx_xvsrli_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_d_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: ++ case LoongArch::BI__builtin_lasx_xvsrlri_w: ++ case LoongArch::BI__builtin_lasx_xvrotri_w: ++ case LoongArch::BI__builtin_lasx_xvsubi_bu: ++ case LoongArch::BI__builtin_lasx_xvsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_wu: ++ case LoongArch::BI__builtin_lasx_xvsubi_du: ++ case LoongArch::BI__builtin_lasx_xvbsrl_v: ++ case LoongArch::BI__builtin_lasx_xvbsll_v: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); ++ case LoongArch::BI__builtin_lasx_xvssrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_hu_w: ++ case LoongArch::BI__builtin_lasx_xvsrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_b: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); ++ case LoongArch::BI__builtin_lasx_xvbitclri_d: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_d: ++ case LoongArch::BI__builtin_lasx_xvbitseti_d: ++ case LoongArch::BI__builtin_lasx_xvsat_d: ++ case LoongArch::BI__builtin_lasx_xvsat_du: ++ case LoongArch::BI__builtin_lasx_xvslli_d: ++ case LoongArch::BI__builtin_lasx_xvsrai_d: ++ case LoongArch::BI__builtin_lasx_xvsrli_d: ++ case LoongArch::BI__builtin_lasx_xvsrari_d: ++ case LoongArch::BI__builtin_lasx_xvrotri_d: ++ case LoongArch::BI__builtin_lasx_xvsrlri_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); ++ case LoongArch::BI__builtin_lasx_xvssrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_wu_d: ++ case LoongArch::BI__builtin_lasx_xvsrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvsrani_w_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); ++ case LoongArch::BI__builtin_lasx_xvssrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_du_q: ++ case LoongArch::BI__builtin_lasx_xvsrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: ++ case LoongArch::BI__builtin_lasx_xvsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); ++ case LoongArch::BI__builtin_lasx_xvseqi_b: ++ case LoongArch::BI__builtin_lasx_xvseqi_h: ++ case LoongArch::BI__builtin_lasx_xvseqi_w: ++ case LoongArch::BI__builtin_lasx_xvseqi_d: ++ case LoongArch::BI__builtin_lasx_xvslti_b: ++ case LoongArch::BI__builtin_lasx_xvslti_h: ++ case LoongArch::BI__builtin_lasx_xvslti_w: ++ case LoongArch::BI__builtin_lasx_xvslti_d: ++ case LoongArch::BI__builtin_lasx_xvslei_b: ++ case LoongArch::BI__builtin_lasx_xvslei_h: ++ case LoongArch::BI__builtin_lasx_xvslei_w: ++ case LoongArch::BI__builtin_lasx_xvslei_d: ++ case LoongArch::BI__builtin_lasx_xvmaxi_b: ++ case LoongArch::BI__builtin_lasx_xvmaxi_h: ++ case LoongArch::BI__builtin_lasx_xvmaxi_w: ++ case LoongArch::BI__builtin_lasx_xvmaxi_d: ++ case LoongArch::BI__builtin_lasx_xvmini_b: ++ case LoongArch::BI__builtin_lasx_xvmini_h: ++ case LoongArch::BI__builtin_lasx_xvmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); ++ case LoongArch::BI__builtin_lasx_xvandi_b: ++ case LoongArch::BI__builtin_lasx_xvnori_b: ++ case LoongArch::BI__builtin_lasx_xvori_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_h: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvxori_b: ++ case LoongArch::BI__builtin_lasx_xvpermi_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); ++ case LoongArch::BI__builtin_lasx_xvbitseli_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_d: ++ case LoongArch::BI__builtin_lasx_xvextrins_b: ++ case LoongArch::BI__builtin_lasx_xvextrins_h: ++ case LoongArch::BI__builtin_lasx_xvextrins_w: ++ case LoongArch::BI__builtin_lasx_xvextrins_d: ++ case LoongArch::BI__builtin_lasx_xvpermi_q: ++ case LoongArch::BI__builtin_lasx_xvpermi_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_h: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvpickve_w_f: ++ case LoongArch::BI__builtin_lasx_xvpickve_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_d: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvpickve_d_f: ++ case LoongArch::BI__builtin_lasx_xvpickve_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); ++ case LoongArch::BI__builtin_lasx_xvinsve0_d: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); ++ case LoongArch::BI__builtin_lasx_xvstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); ++ case LoongArch::BI__builtin_lasx_xvstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); ++ case LoongArch::BI__builtin_lasx_xvldrepl_b: ++ case LoongArch::BI__builtin_lasx_xvld: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); ++ case LoongArch::BI__builtin_lasx_xvldrepl_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); ++ case LoongArch::BI__builtin_lasx_xvldrepl_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); ++ case LoongArch::BI__builtin_lasx_xvldrepl_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); ++ case LoongArch::BI__builtin_lasx_xvst: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ case LoongArch::BI__builtin_lasx_xvldi: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); ++ case LoongArch::BI__builtin_lasx_xvrepli_b: ++ case LoongArch::BI__builtin_lasx_xvrepli_h: ++ case LoongArch::BI__builtin_lasx_xvrepli_w: ++ case LoongArch::BI__builtin_lasx_xvrepli_d: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + } + return false; + } +-- +2.20.1 + diff --git a/0014-Backport-LoongArch-fix-and-add-some-new-support.patch b/0016-LoongArch-CodeGen-Implement-128-bit-and-256-bit-vect.patch similarity index 42% rename from 0014-Backport-LoongArch-fix-and-add-some-new-support.patch rename to 0016-LoongArch-CodeGen-Implement-128-bit-and-256-bit-vect.patch index 9681d0c..8026376 100644 --- a/0014-Backport-LoongArch-fix-and-add-some-new-support.patch +++ b/0016-LoongArch-CodeGen-Implement-128-bit-and-256-bit-vect.patch @@ -1,2055 +1,7 @@ -From 53a624f1fbb2d1f837070b400812e8bddf66fd3d Mon Sep 17 00:00:00 2001 -From: Lu Weining -Date: Tue, 5 Dec 2023 09:20:48 +0800 -Subject: [PATCH 01/12] [BinaryFormat][LoongArch] Define psABI v2.20 relocs for - R_LARCH_CALL36(#73345) - -R_LARCH_CALL36 was designed for function call on medium code model where -the 2 instructions (pcaddu18i + jirl) must be adjacent. - -(cherry picked from commit c3a9c905fbc486add75e16218fe58a04b7b6c282) ---- - llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def | 6 ++++++ - .../tools/llvm-readobj/ELF/reloc-types-loongarch64.test | 2 ++ - llvm/unittests/Object/ELFTest.cpp | 2 ++ - 3 files changed, 10 insertions(+) - -diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def -index 02bce3c71712..c4393432677b 100644 ---- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def -+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def -@@ -118,3 +118,9 @@ ELF_RELOC(R_LARCH_SUB6, 106) - ELF_RELOC(R_LARCH_ADD_ULEB128, 107) - ELF_RELOC(R_LARCH_SUB_ULEB128, 108) - ELF_RELOC(R_LARCH_64_PCREL, 109) -+ -+// Relocs added in ELF for the LoongArchâ„¢ Architecture v20231102, part of the -+// v2.20 LoongArch ABI specs. -+// -+// Spec addition: https://github.com/loongson/la-abi-specs/pull/4 -+ELF_RELOC(R_LARCH_CALL36, 110) -diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test -index e32dc893fa79..88ff7fa405ed 100644 ---- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test -+++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test -@@ -102,6 +102,7 @@ - # CHECK: Type: R_LARCH_ADD_ULEB128 (107) - # CHECK: Type: R_LARCH_SUB_ULEB128 (108) - # CHECK: Type: R_LARCH_64_PCREL (109) -+# CHECK: Type: R_LARCH_CALL36 (110) - - --- !ELF - FileHeader: -@@ -211,3 +212,4 @@ Sections: - - Type: R_LARCH_ADD_ULEB128 - - Type: R_LARCH_SUB_ULEB128 - - Type: R_LARCH_64_PCREL -+ - Type: R_LARCH_CALL36 -diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp -index 50b1df124a4a..ed851dde4c00 100644 ---- a/llvm/unittests/Object/ELFTest.cpp -+++ b/llvm/unittests/Object/ELFTest.cpp -@@ -251,6 +251,8 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) { - getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128)); - EXPECT_EQ("R_LARCH_64_PCREL", - getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL)); -+ EXPECT_EQ("R_LARCH_CALL36", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CALL36)); - } - - TEST(ELFTest, getELFRelativeRelocationType) { --- -2.20.1 - - -From a8ed0f26220bbacb2c485a392f79ac4b271d73af Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 2 Jan 2024 10:55:02 +0800 -Subject: [PATCH 02/12] [LoongArch] Emit function call code sequence as - `PCADDU18I+JIRL` in medium code model - -According to the description of the psABI v2.20: -https://github.com/loongson/la-abi-specs/releases/tag/v2.20, adjustments -are made to the function call instructions under the medium code model. - -At the same time, AsmParser has already supported parsing the call36 and -tail36 macro instructions. - -(cherry picked from commit 2cf420d5b846a4733ef0ef7c8ed0ae0bfd1c6772) ---- - .../AsmParser/LoongArchAsmParser.cpp | 61 +++++++++++++++++++ - .../LoongArch/LoongArchExpandPseudoInsts.cpp | 29 ++++----- - .../Target/LoongArch/LoongArchInstrInfo.td | 23 ++++++- - .../Target/LoongArch/LoongArchMCInstLower.cpp | 3 + - .../LoongArch/LoongArchTargetMachine.cpp | 4 +- - .../MCTargetDesc/LoongArchBaseInfo.h | 1 + - .../MCTargetDesc/LoongArchELFObjectWriter.cpp | 2 + - .../MCTargetDesc/LoongArchFixupKinds.h | 3 + - .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 3 + - .../MCTargetDesc/LoongArchMCExpr.cpp | 3 + - .../LoongArch/MCTargetDesc/LoongArchMCExpr.h | 1 + - llvm/test/CodeGen/LoongArch/code-models.ll | 12 ++-- - .../MC/LoongArch/Basic/Integer/invalid64.s | 2 +- - llvm/test/MC/LoongArch/Macros/macros-call.s | 9 +++ - .../MC/LoongArch/Relocations/relocations.s | 5 ++ - 15 files changed, 133 insertions(+), 28 deletions(-) - create mode 100644 llvm/test/MC/LoongArch/Macros/macros-call.s - -diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp -index a132e645c864..f908e5bc63d3 100644 ---- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp -+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp -@@ -122,6 +122,10 @@ class LoongArchAsmParser : public MCTargetAsmParser { - // Helper to emit pseudo instruction "li.w/d $rd, $imm". - void emitLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); - -+ // Helper to emit pseudo instruction "call36 sym" or "tail36 $rj, sym". -+ void emitFuncCall36(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, -+ bool IsTailCall); -+ - public: - enum LoongArchMatchResultTy { - Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, -@@ -401,6 +405,22 @@ public: - IsValidKind; - } - -+ bool isSImm20pcaddu18i() const { -+ if (!isImm()) -+ return false; -+ -+ int64_t Imm; -+ LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; -+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); -+ bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || -+ VK == LoongArchMCExpr::VK_LoongArch_CALL36; -+ -+ return IsConstantImm -+ ? isInt<20>(Imm) && IsValidKind -+ : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && -+ IsValidKind; -+ } -+ - bool isSImm21lsl2() const { - if (!isImm()) - return false; -@@ -1111,6 +1131,35 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc, - } - } - -+void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, -+ MCStreamer &Out, bool IsTailCall) { -+ // call36 sym -+ // expands to: -+ // pcaddu18i $ra, %call36(sym) -+ // jirl $ra, $ra, 0 -+ // -+ // tail36 $rj, sym -+ // expands to: -+ // pcaddu18i $rj, %call36(sym) -+ // jirl $r0, $rj, 0 -+ unsigned ScratchReg = -+ IsTailCall ? Inst.getOperand(0).getReg() : (unsigned)LoongArch::R1; -+ const MCExpr *Sym = -+ IsTailCall ? Inst.getOperand(1).getExpr() : Inst.getOperand(0).getExpr(); -+ const LoongArchMCExpr *LE = LoongArchMCExpr::create( -+ Sym, llvm::LoongArchMCExpr::VK_LoongArch_CALL36, getContext()); -+ -+ Out.emitInstruction( -+ MCInstBuilder(LoongArch::PCADDU18I).addReg(ScratchReg).addExpr(LE), -+ getSTI()); -+ Out.emitInstruction( -+ MCInstBuilder(LoongArch::JIRL) -+ .addReg(IsTailCall ? (unsigned)LoongArch::R0 : ScratchReg) -+ .addReg(ScratchReg) -+ .addImm(0), -+ getSTI()); -+} -+ - bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, - OperandVector &Operands, - MCStreamer &Out) { -@@ -1159,6 +1208,12 @@ bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, - case LoongArch::PseudoLI_D: - emitLoadImm(Inst, IDLoc, Out); - return false; -+ case LoongArch::PseudoCALL36: -+ emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/false); -+ return false; -+ case LoongArch::PseudoTAIL36: -+ emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/true); -+ return false; - } - Out.emitInstruction(Inst, getSTI()); - return false; -@@ -1440,6 +1495,12 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - /*Upper=*/(1 << 19) - 1, - "operand must be a symbol with modifier (e.g. %pc_hi20) or an integer " - "in the range"); -+ case Match_InvalidSImm20pcaddu18i: -+ return generateImmOutOfRangeError( -+ Operands, ErrorInfo, /*Lower=*/-(1 << 19), -+ /*Upper=*/(1 << 19) - 1, -+ "operand must be a symbol with modifier (e.g. %call36) or an integer " -+ "in the range"); - case Match_InvalidSImm21lsl2: - return generateImmOutOfRangeError( - Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, -diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp -index 72c1f1cec198..8eda2dcc1633 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp -@@ -458,11 +458,11 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL( - } - case CodeModel::Medium: { - // CALL: -- // pcalau12i $ra, %pc_hi20(func) -- // jirl $ra, $ra, %pc_lo12(func) -+ // pcaddu18i $ra, %call36(func) -+ // jirl $ra, $ra, 0 - // TAIL: -- // pcalau12i $scratch, %pc_hi20(func) -- // jirl $r0, $scratch, %pc_lo12(func) -+ // pcaddu18i $scratch, %call36(func) -+ // jirl $r0, $scratch, 0 - Opcode = - IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; - Register ScratchReg = -@@ -470,18 +470,15 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL( - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : LoongArch::R1; - MachineInstrBuilder MIB = -- BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg); -- CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg); -- if (Func.isSymbol()) { -- const char *FnName = Func.getSymbolName(); -- MIB.addExternalSymbol(FnName, LoongArchII::MO_PCREL_HI); -- CALL.addExternalSymbol(FnName, LoongArchII::MO_PCREL_LO); -- break; -- } -- assert(Func.isGlobal() && "Expected a GlobalValue at this time"); -- const GlobalValue *GV = Func.getGlobal(); -- MIB.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_HI); -- CALL.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_LO); -+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg); -+ -+ CALL = -+ BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0); -+ -+ if (Func.isSymbol()) -+ MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36); -+ else -+ MIB.addDisp(Func, 0, LoongArchII::MO_CALL36); - break; - } - case CodeModel::Large: { -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index ab1890556814..67de5f7afd78 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -351,6 +351,10 @@ def simm20_lu32id : SImm20Operand { - let ParserMatchClass = SImmAsmOperand<20, "lu32id">; - } - -+def simm20_pcaddu18i : SImm20Operand { -+ let ParserMatchClass = SImmAsmOperand<20, "pcaddu18i">; -+} -+ - def simm21_lsl2 : Operand { - let ParserMatchClass = SImmAsmOperand<21, "lsl2">; - let EncoderMethod = "getImmOpValueAsr<2>"; -@@ -772,7 +776,7 @@ def LU32I_D : Fmt1RI20<0x16000000, (outs GPR:$dst), - "$rd, $imm20">; - } - def LU52I_D : ALU_2RI12<0x03000000, simm12_lu52id>; --def PCADDU18I : ALU_1RI20<0x1e000000, simm20>; -+def PCADDU18I : ALU_1RI20<0x1e000000, simm20_pcaddu18i>; - def MUL_D : ALU_3R<0x001d8000>; - def MULH_D : ALU_3R<0x001e0000>; - def MULH_DU : ALU_3R<0x001e8000>; -@@ -1324,7 +1328,7 @@ def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), - (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; - - let isCall = 1, Defs = [R1] in --def PseudoCALL : Pseudo<(outs), (ins simm26_symbol:$func)>; -+def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func)>; - - def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; - def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; -@@ -1344,7 +1348,7 @@ def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, - PseudoInstExpansion<(JIRL R0, R1, 0)>; - - let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in --def PseudoTAIL : Pseudo<(outs), (ins simm26_symbol:$dst)>; -+def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst)>; - - def : Pat<(loongarch_tail (iPTR tglobaladdr:$dst)), - (PseudoTAIL tglobaladdr:$dst)>; -@@ -1367,6 +1371,19 @@ def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, - PseudoInstExpansion<(JIRL R0, GPR:$rj, - simm16_lsl2:$imm16)>; - -+/// call36/taill36 macro instructions -+let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, isAsmParserOnly = 1, -+ Defs = [R1], Size = 8, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in -+def PseudoCALL36 : Pseudo<(outs), (ins bare_symbol:$dst), [], -+ "call36", "$dst">, -+ Requires<[IsLA64]>; -+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3], -+ isCodeGenOnly = 0, isAsmParserOnly = 1, Size = 8, hasSideEffects = 0, -+ mayStore = 0, mayLoad = 0 in -+def PseudoTAIL36 : Pseudo<(outs), (ins GPR:$tmp, bare_symbol:$dst), [], -+ "tail36", "$tmp, $dst">, -+ Requires<[IsLA64]>; -+ - /// Load address (la*) macro instructions. - - // Define isCodeGenOnly = 0 to expose them to tablegened assembly parser. -diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp -index 5daa9481c907..98ad49f25e3f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp -@@ -95,6 +95,9 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, - case LoongArchII::MO_GD_PC_HI: - Kind = LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20; - break; -+ case LoongArchII::MO_CALL36: -+ Kind = LoongArchMCExpr::VK_LoongArch_CALL36; -+ break; - // TODO: Handle more target-flags. - } - -diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp -index d0a4e9375048..0efc5e6ebb99 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp -@@ -63,11 +63,11 @@ getEffectiveLoongArchCodeModel(const Triple &TT, - - switch (*CM) { - case CodeModel::Small: -- case CodeModel::Medium: - return *CM; -+ case CodeModel::Medium: - case CodeModel::Large: - if (!TT.isArch64Bit()) -- report_fatal_error("Large code model requires LA64"); -+ report_fatal_error("Medium/Large code model requires LA64"); - return *CM; - default: - report_fatal_error( -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h -index cee6dad1f095..0692cb92b694 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h -@@ -47,6 +47,7 @@ enum { - MO_IE_PC64_HI, - MO_LD_PC_HI, - MO_GD_PC_HI, -+ MO_CALL36 - // TODO: Add more flags. - }; - } // end namespace LoongArchII -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -index e60b9c2cfd97..0a52380dd2cd 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -@@ -90,6 +90,8 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, - return ELF::R_LARCH_TLS_LE64_LO20; - case LoongArch::fixup_loongarch_tls_le64_hi12: - return ELF::R_LARCH_TLS_LE64_HI12; -+ case LoongArch::fixup_loongarch_call36: -+ return ELF::R_LARCH_CALL36; - // TODO: Handle more fixup-kinds. - } - } -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h -index 78414408f21f..0d19d2b0fb1f 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h -@@ -111,6 +111,9 @@ enum Fixups { - fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX, - // Generate an R_LARCH_ALIGN which indicates the linker may fixup align here. - fixup_loongarch_align = FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN, -+ // 36-bit fixup corresponding to %call36(foo) for a pair instructions: -+ // pcaddu18i+jirl. -+ fixup_loongarch_call36 = FirstLiteralRelocationKind + ELF::R_LARCH_CALL36, - }; - } // end namespace LoongArch - } // end namespace llvm -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -index 09d92ac9aa3a..7c4fe9674d4e 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -@@ -241,6 +241,9 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, - case LoongArchMCExpr::VK_LoongArch_TLS_GD_HI20: - FixupKind = LoongArch::fixup_loongarch_tls_gd_hi20; - break; -+ case LoongArchMCExpr::VK_LoongArch_CALL36: -+ FixupKind = LoongArch::fixup_loongarch_call36; -+ break; - } - } else if (Kind == MCExpr::SymbolRef && - cast(Expr)->getKind() == -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp -index 82c992b1cc8c..8ca8876a19b9 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp -@@ -138,6 +138,8 @@ StringRef LoongArchMCExpr::getVariantKindName(VariantKind Kind) { - return "gd_pc_hi20"; - case VK_LoongArch_TLS_GD_HI20: - return "gd_hi20"; -+ case VK_LoongArch_CALL36: -+ return "call36"; - } - } - -@@ -180,6 +182,7 @@ LoongArchMCExpr::getVariantKindForName(StringRef name) { - .Case("ld_hi20", VK_LoongArch_TLS_LD_HI20) - .Case("gd_pc_hi20", VK_LoongArch_TLS_GD_PC_HI20) - .Case("gd_hi20", VK_LoongArch_TLS_GD_HI20) -+ .Case("call36", VK_LoongArch_CALL36) - .Default(VK_LoongArch_Invalid); - } - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h -index 93251f824103..bd828116d7fa 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h -@@ -61,6 +61,7 @@ public: - VK_LoongArch_TLS_LD_HI20, - VK_LoongArch_TLS_GD_PC_HI20, - VK_LoongArch_TLS_GD_HI20, -+ VK_LoongArch_CALL36, - VK_LoongArch_Invalid // Must be the last item. - }; - -diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll -index c610f645a06a..7c6f46d5e926 100644 ---- a/llvm/test/CodeGen/LoongArch/code-models.ll -+++ b/llvm/test/CodeGen/LoongArch/code-models.ll -@@ -23,8 +23,8 @@ define i32 @call_globaladdress(i32 %a) nounwind { - ; MEDIUM: # %bb.0: - ; MEDIUM-NEXT: addi.d $sp, $sp, -16 - ; MEDIUM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill --; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(callee) --; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(callee) -+; MEDIUM-NEXT: pcaddu18i $ra, %call36(callee) -+; MEDIUM-NEXT: jirl $ra, $ra, 0 - ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload - ; MEDIUM-NEXT: addi.d $sp, $sp, 16 - ; MEDIUM-NEXT: ret -@@ -68,8 +68,8 @@ define void @call_external_sym(ptr %dst) { - ; MEDIUM-NEXT: .cfi_offset 1, -8 - ; MEDIUM-NEXT: ori $a2, $zero, 1000 - ; MEDIUM-NEXT: move $a1, $zero --; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(memset) --; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(memset) -+; MEDIUM-NEXT: pcaddu18i $ra, %call36(memset) -+; MEDIUM-NEXT: jirl $ra, $ra, 0 - ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload - ; MEDIUM-NEXT: addi.d $sp, $sp, 16 - ; MEDIUM-NEXT: ret -@@ -105,8 +105,8 @@ define i32 @caller_tail(i32 %i) nounwind { - ; - ; MEDIUM-LABEL: caller_tail: - ; MEDIUM: # %bb.0: # %entry --; MEDIUM-NEXT: pcalau12i $a1, %pc_hi20(callee_tail) --; MEDIUM-NEXT: jirl $zero, $a1, %pc_lo12(callee_tail) -+; MEDIUM-NEXT: pcaddu18i $a1, %call36(callee_tail) -+; MEDIUM-NEXT: jr $a1 - ; - ; LARGE-LABEL: caller_tail: - ; LARGE: # %bb.0: # %entry -diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s -index acddca9432a6..1c1c658ad440 100644 ---- a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s -+++ b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s -@@ -65,7 +65,7 @@ addu16i.d $a0, $a0, 32768 - - ## simm20 - pcaddu18i $a0, 0x80000 --# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] -+# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %call36) or an integer in the range [-524288, 524287] - - ## simm20_lu32id - lu32i.d $a0, 0x80000 -diff --git a/llvm/test/MC/LoongArch/Macros/macros-call.s b/llvm/test/MC/LoongArch/Macros/macros-call.s -new file mode 100644 -index 000000000000..a648a3978038 ---- /dev/null -+++ b/llvm/test/MC/LoongArch/Macros/macros-call.s -@@ -0,0 +1,9 @@ -+# RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s -+ -+call36 sym_call -+# CHECK: pcaddu18i $ra, %call36(sym_call) -+# CHECK-NEXT: jirl $ra, $ra, 0 -+ -+tail36 $t0, sym_tail -+# CHECK: pcaddu18i $t0, %call36(sym_tail) -+# CHECK-NEXT: jr $t0 -diff --git a/llvm/test/MC/LoongArch/Relocations/relocations.s b/llvm/test/MC/LoongArch/Relocations/relocations.s -index 042cc93470a1..bec71e103893 100644 ---- a/llvm/test/MC/LoongArch/Relocations/relocations.s -+++ b/llvm/test/MC/LoongArch/Relocations/relocations.s -@@ -218,3 +218,8 @@ lu12i.w $t1, %gd_hi20(foo) - # RELOC: R_LARCH_TLS_GD_HI20 foo 0x0 - # INSTR: lu12i.w $t1, %gd_hi20(foo) - # FIXUP: fixup A - offset: 0, value: %gd_hi20(foo), kind: FK_NONE -+ -+pcaddu18i $t1, %call36(foo) -+# RELOC: R_LARCH_CALL36 foo 0x0 -+# INSTR: pcaddu18i $t1, %call36(foo) -+# FIXUP: fixup A - offset: 0, value: %call36(foo), kind: FK_NONE --- -2.20.1 - - -From d59688f326d8f915ffc5db80b40c9b99d9f95470 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 2 Jan 2024 10:57:40 +0800 -Subject: [PATCH 03/12] [LoongArch] Pre-commit test for #76555. NFC - -(cherry picked from commit 3d6fc35b9071009c5ef37f879a12982c6a54db60) ---- - .../LoongArch/psabi-restricted-scheduling.ll | 172 ++++++++++++++++++ - 1 file changed, 172 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll - -diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll -new file mode 100644 -index 000000000000..150a935d7bf8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll -@@ -0,0 +1,172 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --code-model=medium --post-RA-scheduler=0 < %s \ -+; RUN: | FileCheck %s --check-prefix=MEDIUM_NO_SCH -+; RUN: llc --mtriple=loongarch64 --code-model=medium --post-RA-scheduler=1 < %s \ -+; RUN: | FileCheck %s --check-prefix=MEDIUM_SCH -+; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=0 < %s \ -+; RUN: | FileCheck %s --check-prefix=LARGE_NO_SCH -+; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=1 < %s \ -+; RUN: | FileCheck %s --check-prefix=LARGE_SCH -+ -+;; FIXME: According to the description of the psABI v2.30, the code sequences -+;; of `PseudoLA*_LARGE` instruction and Medium code model's function call must -+;; be adjacent. -+ -+@g = dso_local global i64 zeroinitializer, align 4 -+@G = global i64 zeroinitializer, align 4 -+@gd = external thread_local global i64 -+@ld = external thread_local(localdynamic) global i64 -+@ie = external thread_local(initialexec) global i64 -+ -+declare ptr @bar(i64) -+ -+define void @foo() nounwind { -+; MEDIUM_NO_SCH-LABEL: foo: -+; MEDIUM_NO_SCH: # %bb.0: -+; MEDIUM_NO_SCH-NEXT: addi.d $sp, $sp, -16 -+; MEDIUM_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) -+; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) -+; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0 -+; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) -+; MEDIUM_NO_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) -+; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0 -+; MEDIUM_NO_SCH-NEXT: ori $a0, $zero, 1 -+; MEDIUM_NO_SCH-NEXT: pcaddu18i $ra, %call36(bar) -+; MEDIUM_NO_SCH-NEXT: jirl $ra, $ra, 0 -+; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) -+; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) -+; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp -+; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) -+; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) -+; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp -+; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) -+; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) -+; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp -+; MEDIUM_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; MEDIUM_NO_SCH-NEXT: addi.d $sp, $sp, 16 -+; MEDIUM_NO_SCH-NEXT: ret -+; -+; MEDIUM_SCH-LABEL: foo: -+; MEDIUM_SCH: # %bb.0: -+; MEDIUM_SCH-NEXT: addi.d $sp, $sp, -16 -+; MEDIUM_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; MEDIUM_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) -+; MEDIUM_SCH-NEXT: pcaddu18i $ra, %call36(bar) -+; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) -+; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 -+; MEDIUM_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) -+; MEDIUM_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) -+; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 -+; MEDIUM_SCH-NEXT: ori $a0, $zero, 1 -+; MEDIUM_SCH-NEXT: jirl $ra, $ra, 0 -+; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) -+; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) -+; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp -+; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) -+; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) -+; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp -+; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) -+; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) -+; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp -+; MEDIUM_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; MEDIUM_SCH-NEXT: addi.d $sp, $sp, 16 -+; MEDIUM_SCH-NEXT: ret -+; -+; LARGE_NO_SCH-LABEL: foo: -+; LARGE_NO_SCH: # %bb.0: -+; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, -16 -+; LARGE_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; LARGE_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) -+; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) -+; LARGE_NO_SCH-NEXT: lu32i.d $a1, %got64_pc_lo20(G) -+; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 -+; LARGE_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) -+; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %pc_lo12(g) -+; LARGE_NO_SCH-NEXT: lu32i.d $a1, %pc64_lo20(g) -+; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) -+; LARGE_NO_SCH-NEXT: add.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 -+; LARGE_NO_SCH-NEXT: ori $a0, $zero, 1 -+; LARGE_NO_SCH-NEXT: pcalau12i $a1, %got_pc_hi20(bar) -+; LARGE_NO_SCH-NEXT: addi.d $ra, $zero, %got_pc_lo12(bar) -+; LARGE_NO_SCH-NEXT: lu32i.d $ra, %got64_pc_lo20(bar) -+; LARGE_NO_SCH-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(bar) -+; LARGE_NO_SCH-NEXT: ldx.d $ra, $ra, $a1 -+; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 -+; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) -+; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(gd) -+; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(gd) -+; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(gd) -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp -+; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) -+; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) -+; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) -+; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp -+; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) -+; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) -+; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) -+; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp -+; LARGE_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, 16 -+; LARGE_NO_SCH-NEXT: ret -+; -+; LARGE_SCH-LABEL: foo: -+; LARGE_SCH: # %bb.0: -+; LARGE_SCH-NEXT: addi.d $sp, $sp, -16 -+; LARGE_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; LARGE_SCH-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) -+; LARGE_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) -+; LARGE_SCH-NEXT: addi.d $ra, $zero, %got_pc_lo12(bar) -+; LARGE_SCH-NEXT: lu32i.d $a1, %got64_pc_lo20(G) -+; LARGE_SCH-NEXT: lu32i.d $ra, %got64_pc_lo20(bar) -+; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) -+; LARGE_SCH-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(bar) -+; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_SCH-NEXT: addi.d $a1, $zero, %pc_lo12(g) -+; LARGE_SCH-NEXT: lu32i.d $a1, %pc64_lo20(g) -+; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) -+; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 -+; LARGE_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) -+; LARGE_SCH-NEXT: add.d $a0, $a1, $a0 -+; LARGE_SCH-NEXT: pcalau12i $a1, %got_pc_hi20(bar) -+; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 -+; LARGE_SCH-NEXT: ldx.d $ra, $ra, $a1 -+; LARGE_SCH-NEXT: ori $a0, $zero, 1 -+; LARGE_SCH-NEXT: jirl $ra, $ra, 0 -+; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(gd) -+; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) -+; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(gd) -+; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(gd) -+; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) -+; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) -+; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) -+; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp -+; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) -+; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) -+; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) -+; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) -+; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp -+; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) -+; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp -+; LARGE_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; LARGE_SCH-NEXT: addi.d $sp, $sp, 16 -+; LARGE_SCH-NEXT: ret -+ %V = load volatile i64, ptr @G -+ %v = load volatile i64, ptr @g -+ call void @bar(i64 1) -+ %v_gd = load volatile i64, ptr @gd -+ %v_ld = load volatile i64, ptr @ld -+ %v_ie = load volatile i64, ptr @ie -+ ret void -+} --- -2.20.1 - - -From 1248440ab618fcffada7fa29eed71bc04945c3ec Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Tue, 25 Jun 2024 09:52:17 +0800 -Subject: [PATCH 04/12] [LoongArch][test] Remove the FIXME in - psabi-restricted-scheduling.ll which has been addressed by #76555 - -(cherry picked from commit 7ea63b9db4198688873036f3b0b81f9124076f7a) ---- - llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll -index 150a935d7bf8..a515939b9c2b 100644 ---- a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll -+++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll -@@ -8,10 +8,6 @@ - ; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=1 < %s \ - ; RUN: | FileCheck %s --check-prefix=LARGE_SCH - --;; FIXME: According to the description of the psABI v2.30, the code sequences --;; of `PseudoLA*_LARGE` instruction and Medium code model's function call must --;; be adjacent. -- - @g = dso_local global i64 zeroinitializer, align 4 - @G = global i64 zeroinitializer, align 4 - @gd = external thread_local global i64 --- -2.20.1 - - -From 0e86ae628414dac6d7ef2eaccc8655d790595f9f Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 2 Jan 2024 10:57:15 +0800 -Subject: [PATCH 05/12] [LoongArch] Reimplement the expansion of - PseudoLA*_LARGE instructions (#76555) - -According to the description of the psABI v2.30: -https://github.com/loongson/la-abi-specs/releases/tag/v2.30, moved the -expansion of relevant pseudo-instructions from -`LoongArchPreRAExpandPseudo` pass to `LoongArchExpandPseudo` pass, to -ensure that the code sequences of `PseudoLA*_LARGE` instructions and -Medium code model's function call are not scheduled. - -(cherry picked from commit c56a5e895a96fec4292e9333d998cfa88770432a) ---- - .../LoongArch/LoongArchExpandPseudoInsts.cpp | 519 +++++++++--------- - .../LoongArch/LoongArchISelLowering.cpp | 24 +- - .../Target/LoongArch/LoongArchISelLowering.h | 4 + - .../Target/LoongArch/LoongArchInstrInfo.td | 83 ++- - llvm/test/CodeGen/LoongArch/code-models.ll | 36 +- - llvm/test/CodeGen/LoongArch/expand-call.ll | 2 +- - llvm/test/CodeGen/LoongArch/global-address.ll | 32 +- - .../LoongArch/psabi-restricted-scheduling.ll | 102 ++-- - llvm/test/CodeGen/LoongArch/tls-models.ll | 68 +-- - 9 files changed, 487 insertions(+), 383 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp -index 8eda2dcc1633..f977f176066a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp -@@ -62,43 +62,24 @@ private: - MachineBasicBlock::iterator &NextMBBI, - unsigned FlagsHi, unsigned SecondOpcode, - unsigned FlagsLo); -- bool expandLargeAddressLoad(MachineBasicBlock &MBB, -- MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, -- unsigned LastOpcode, unsigned IdentifyingMO); -- bool expandLargeAddressLoad(MachineBasicBlock &MBB, -- MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, -- unsigned LastOpcode, unsigned IdentifyingMO, -- const MachineOperand &Symbol, Register DestReg, -- bool EraseFromParent); - bool expandLoadAddressPcrel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, -- bool Large = false); -+ MachineBasicBlock::iterator &NextMBBI); - bool expandLoadAddressGot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, -- bool Large = false); -+ MachineBasicBlock::iterator &NextMBBI); - bool expandLoadAddressTLSLE(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); - bool expandLoadAddressTLSIE(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, -- bool Large = false); -+ MachineBasicBlock::iterator &NextMBBI); - bool expandLoadAddressTLSLD(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, -- bool Large = false); -+ MachineBasicBlock::iterator &NextMBBI); - bool expandLoadAddressTLSGD(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, -- bool Large = false); -- bool expandFunctionCALL(MachineBasicBlock &MBB, -- MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, -- bool IsTailCall); -+ MachineBasicBlock::iterator &NextMBBI); - }; - - char LoongArchPreRAExpandPseudo::ID = 0; -@@ -131,30 +112,16 @@ bool LoongArchPreRAExpandPseudo::expandMI( - switch (MBBI->getOpcode()) { - case LoongArch::PseudoLA_PCREL: - return expandLoadAddressPcrel(MBB, MBBI, NextMBBI); -- case LoongArch::PseudoLA_PCREL_LARGE: -- return expandLoadAddressPcrel(MBB, MBBI, NextMBBI, /*Large=*/true); - case LoongArch::PseudoLA_GOT: - return expandLoadAddressGot(MBB, MBBI, NextMBBI); -- case LoongArch::PseudoLA_GOT_LARGE: -- return expandLoadAddressGot(MBB, MBBI, NextMBBI, /*Large=*/true); - case LoongArch::PseudoLA_TLS_LE: - return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_TLS_IE: - return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI); -- case LoongArch::PseudoLA_TLS_IE_LARGE: -- return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI, /*Large=*/true); - case LoongArch::PseudoLA_TLS_LD: - return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI); -- case LoongArch::PseudoLA_TLS_LD_LARGE: -- return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI, /*Large=*/true); - case LoongArch::PseudoLA_TLS_GD: - return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI); -- case LoongArch::PseudoLA_TLS_GD_LARGE: -- return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI, /*Large=*/true); -- case LoongArch::PseudoCALL: -- return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); -- case LoongArch::PseudoTAIL: -- return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); - } - return false; - } -@@ -187,118 +154,9 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair( - return true; - } - --bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( -- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, -- unsigned IdentifyingMO) { -- MachineInstr &MI = *MBBI; -- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, -- MI.getOperand(2), MI.getOperand(0).getReg(), -- true); --} -- --bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( -- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, -- unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, -- bool EraseFromParent) { -- // Code Sequence: -- // -- // Part1: pcalau12i $scratch, %MO1(sym) -- // Part0: addi.d $dest, $zero, %MO0(sym) -- // Part2: lu32i.d $dest, %MO2(sym) -- // Part3: lu52i.d $dest, $dest, %MO3(sym) -- // Fin: LastOpcode $dest, $dest, $scratch -- -- unsigned MO0, MO1, MO2, MO3; -- switch (IdentifyingMO) { -- default: -- llvm_unreachable("unsupported identifying MO"); -- case LoongArchII::MO_PCREL_LO: -- MO0 = IdentifyingMO; -- MO1 = LoongArchII::MO_PCREL_HI; -- MO2 = LoongArchII::MO_PCREL64_LO; -- MO3 = LoongArchII::MO_PCREL64_HI; -- break; -- case LoongArchII::MO_GOT_PC_HI: -- case LoongArchII::MO_LD_PC_HI: -- case LoongArchII::MO_GD_PC_HI: -- // These cases relocate just like the GOT case, except for Part1. -- MO0 = LoongArchII::MO_GOT_PC_LO; -- MO1 = IdentifyingMO; -- MO2 = LoongArchII::MO_GOT_PC64_LO; -- MO3 = LoongArchII::MO_GOT_PC64_HI; -- break; -- case LoongArchII::MO_IE_PC_LO: -- MO0 = IdentifyingMO; -- MO1 = LoongArchII::MO_IE_PC_HI; -- MO2 = LoongArchII::MO_IE_PC64_LO; -- MO3 = LoongArchII::MO_IE_PC64_HI; -- break; -- } -- -- MachineFunction *MF = MBB.getParent(); -- MachineInstr &MI = *MBBI; -- DebugLoc DL = MI.getDebugLoc(); -- -- assert(MF->getSubtarget().is64Bit() && -- "Large code model requires LA64"); -- -- Register TmpPart1 = -- MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); -- Register TmpPart0 = -- DestReg.isVirtual() -- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) -- : DestReg; -- Register TmpParts02 = -- DestReg.isVirtual() -- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) -- : DestReg; -- Register TmpParts023 = -- DestReg.isVirtual() -- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) -- : DestReg; -- -- auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), TmpPart1); -- auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), TmpPart0) -- .addReg(LoongArch::R0); -- auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), TmpParts02) -- // "rj" is needed due to InstrInfo pattern requirement. -- .addReg(TmpPart0, RegState::Kill); -- auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), TmpParts023) -- .addReg(TmpParts02, RegState::Kill); -- BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) -- .addReg(TmpParts023) -- .addReg(TmpPart1, RegState::Kill); -- -- if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { -- const char *SymName = Symbol.getSymbolName(); -- Part0.addExternalSymbol(SymName, MO0); -- Part1.addExternalSymbol(SymName, MO1); -- Part2.addExternalSymbol(SymName, MO2); -- Part3.addExternalSymbol(SymName, MO3); -- } else { -- Part0.addDisp(Symbol, 0, MO0); -- Part1.addDisp(Symbol, 0, MO1); -- Part2.addDisp(Symbol, 0, MO2); -- Part3.addDisp(Symbol, 0, MO3); -- } -- -- if (EraseFromParent) -- MI.eraseFromParent(); -- -- return true; --} -- - bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, bool Large) { -- if (Large) -- // Emit the 5-insn large address load sequence with the `%pc` family of -- // relocs. -- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, -- LoongArchII::MO_PCREL_LO); -- -+ MachineBasicBlock::iterator &NextMBBI) { - // Code Sequence: - // pcalau12i $rd, %pc_hi20(sym) - // addi.w/d $rd, $rd, %pc_lo12(sym) -@@ -311,13 +169,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( - - bool LoongArchPreRAExpandPseudo::expandLoadAddressGot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, bool Large) { -- if (Large) -- // Emit the 5-insn large address load sequence with the `%got_pc` family -- // of relocs, loading the result from GOT with `ldx.d` in the end. -- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, -- LoongArchII::MO_GOT_PC_HI); -- -+ MachineBasicBlock::iterator &NextMBBI) { - // Code Sequence: - // pcalau12i $rd, %got_pc_hi20(sym) - // ld.w/d $rd, $rd, %got_pc_lo12(sym) -@@ -378,13 +230,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE( - - bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, bool Large) { -- if (Large) -- // Emit the 5-insn large address load sequence with the `%ie_pc` family -- // of relocs, loading the result with `ldx.d` in the end. -- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, -- LoongArchII::MO_IE_PC_LO); -- -+ MachineBasicBlock::iterator &NextMBBI) { - // Code Sequence: - // pcalau12i $rd, %ie_pc_hi20(sym) - // ld.w/d $rd, $rd, %ie_pc_lo12(sym) -@@ -397,13 +243,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( - - bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, bool Large) { -- if (Large) -- // Emit the 5-insn large address load sequence with the `%got_pc` family -- // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. -- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, -- LoongArchII::MO_LD_PC_HI); -- -+ MachineBasicBlock::iterator &NextMBBI) { - // Code Sequence: - // pcalau12i $rd, %ld_pc_hi20(sym) - // addi.w/d $rd, $rd, %got_pc_lo12(sym) -@@ -416,13 +256,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( - - bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, bool Large) { -- if (Large) -- // Emit the 5-insn large address load sequence with the `%got_pc` family -- // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. -- return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, -- LoongArchII::MO_GD_PC_HI); -- -+ MachineBasicBlock::iterator &NextMBBI) { - // Code Sequence: - // pcalau12i $rd, %gd_pc_hi20(sym) - // addi.w/d $rd, $rd, %got_pc_lo12(sym) -@@ -433,85 +267,6 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( - SecondOpcode, LoongArchII::MO_GOT_PC_LO); - } - --bool LoongArchPreRAExpandPseudo::expandFunctionCALL( -- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -- MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { -- MachineFunction *MF = MBB.getParent(); -- MachineInstr &MI = *MBBI; -- DebugLoc DL = MI.getDebugLoc(); -- const MachineOperand &Func = MI.getOperand(0); -- MachineInstrBuilder CALL; -- unsigned Opcode; -- -- switch (MF->getTarget().getCodeModel()) { -- default: -- report_fatal_error("Unsupported code model"); -- break; -- case CodeModel::Small: { -- // CALL: -- // bl func -- // TAIL: -- // b func -- Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL; -- CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func); -- break; -- } -- case CodeModel::Medium: { -- // CALL: -- // pcaddu18i $ra, %call36(func) -- // jirl $ra, $ra, 0 -- // TAIL: -- // pcaddu18i $scratch, %call36(func) -- // jirl $r0, $scratch, 0 -- Opcode = -- IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; -- Register ScratchReg = -- IsTailCall -- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) -- : LoongArch::R1; -- MachineInstrBuilder MIB = -- BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg); -- -- CALL = -- BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0); -- -- if (Func.isSymbol()) -- MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36); -- else -- MIB.addDisp(Func, 0, LoongArchII::MO_CALL36); -- break; -- } -- case CodeModel::Large: { -- // Emit the 5-insn large address load sequence, either directly or -- // indirectly in case of going through the GOT, then JIRL_TAIL or -- // JIRL_CALL to $addr. -- Opcode = -- IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; -- Register AddrReg = -- IsTailCall -- ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) -- : LoongArch::R1; -- -- bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); -- unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; -- unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; -- expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, -- false); -- CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); -- break; -- } -- } -- -- // Transfer implicit operands. -- CALL.copyImplicitOps(MI); -- -- // Transfer MI flags. -- CALL.setMIFlags(MI.getFlags()); -- -- MI.eraseFromParent(); -- return true; --} -- - class LoongArchExpandPseudo : public MachineFunctionPass { - public: - const LoongArchInstrInfo *TII; -@@ -533,6 +288,35 @@ private: - MachineBasicBlock::iterator &NextMBBI); - bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); -+ bool expandLargeAddressLoad(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI, -+ unsigned LastOpcode, unsigned IdentifyingMO); -+ bool expandLargeAddressLoad(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI, -+ unsigned LastOpcode, unsigned IdentifyingMO, -+ const MachineOperand &Symbol, Register DestReg, -+ bool EraseFromParent); -+ bool expandLoadAddressPcrelLarge(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI); -+ bool expandLoadAddressGotLarge(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI); -+ bool expandLoadAddressTLSIELarge(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI); -+ bool expandLoadAddressTLSLDLarge(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI); -+ bool expandLoadAddressTLSGDLarge(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI); -+ bool expandFunctionCALL(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI, -+ bool IsTailCall); - }; - - char LoongArchExpandPseudo::ID = 0; -@@ -567,6 +351,24 @@ bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, - switch (MBBI->getOpcode()) { - case LoongArch::PseudoCopyCFR: - return expandCopyCFR(MBB, MBBI, NextMBBI); -+ case LoongArch::PseudoLA_PCREL_LARGE: -+ return expandLoadAddressPcrelLarge(MBB, MBBI, NextMBBI); -+ case LoongArch::PseudoLA_GOT_LARGE: -+ return expandLoadAddressGotLarge(MBB, MBBI, NextMBBI); -+ case LoongArch::PseudoLA_TLS_IE_LARGE: -+ return expandLoadAddressTLSIELarge(MBB, MBBI, NextMBBI); -+ case LoongArch::PseudoLA_TLS_LD_LARGE: -+ return expandLoadAddressTLSLDLarge(MBB, MBBI, NextMBBI); -+ case LoongArch::PseudoLA_TLS_GD_LARGE: -+ return expandLoadAddressTLSGDLarge(MBB, MBBI, NextMBBI); -+ case LoongArch::PseudoCALL: -+ case LoongArch::PseudoCALL_MEDIUM: -+ case LoongArch::PseudoCALL_LARGE: -+ return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); -+ case LoongArch::PseudoTAIL: -+ case LoongArch::PseudoTAIL_MEDIUM: -+ case LoongArch::PseudoTAIL_LARGE: -+ return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); - } - - return false; -@@ -625,6 +427,213 @@ bool LoongArchExpandPseudo::expandCopyCFR( - return true; - } - -+bool LoongArchExpandPseudo::expandLargeAddressLoad( -+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, -+ unsigned IdentifyingMO) { -+ MachineInstr &MI = *MBBI; -+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, -+ MI.getOperand(2), MI.getOperand(0).getReg(), -+ true); -+} -+ -+bool LoongArchExpandPseudo::expandLargeAddressLoad( -+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, -+ unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, -+ bool EraseFromParent) { -+ // Code Sequence: -+ // -+ // Part1: pcalau12i $dst, %MO1(sym) -+ // Part0: addi.d $t8, $zero, %MO0(sym) -+ // Part2: lu32i.d $t8, %MO2(sym) -+ // Part3: lu52i.d $t8, $t8, %MO3(sym) -+ // Fin: LastOpcode $dst, $t8, $dst -+ -+ unsigned MO0, MO1, MO2, MO3; -+ switch (IdentifyingMO) { -+ default: -+ llvm_unreachable("unsupported identifying MO"); -+ case LoongArchII::MO_PCREL_LO: -+ MO0 = IdentifyingMO; -+ MO1 = LoongArchII::MO_PCREL_HI; -+ MO2 = LoongArchII::MO_PCREL64_LO; -+ MO3 = LoongArchII::MO_PCREL64_HI; -+ break; -+ case LoongArchII::MO_GOT_PC_HI: -+ case LoongArchII::MO_LD_PC_HI: -+ case LoongArchII::MO_GD_PC_HI: -+ // These cases relocate just like the GOT case, except for Part1. -+ MO0 = LoongArchII::MO_GOT_PC_LO; -+ MO1 = IdentifyingMO; -+ MO2 = LoongArchII::MO_GOT_PC64_LO; -+ MO3 = LoongArchII::MO_GOT_PC64_HI; -+ break; -+ case LoongArchII::MO_IE_PC_LO: -+ MO0 = IdentifyingMO; -+ MO1 = LoongArchII::MO_IE_PC_HI; -+ MO2 = LoongArchII::MO_IE_PC64_LO; -+ MO3 = LoongArchII::MO_IE_PC64_HI; -+ break; -+ } -+ -+ MachineFunction *MF = MBB.getParent(); -+ MachineInstr &MI = *MBBI; -+ DebugLoc DL = MI.getDebugLoc(); -+ Register ScratchReg = LoongArch::R20; // $t8 -+ -+ assert(MF->getSubtarget().is64Bit() && -+ "Large code model requires LA64"); -+ -+ auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), DestReg); -+ auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), ScratchReg) -+ .addReg(LoongArch::R0); -+ auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), ScratchReg) -+ // "rj" is needed due to InstrInfo pattern requirement. -+ .addReg(ScratchReg); -+ auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), ScratchReg) -+ .addReg(ScratchReg); -+ BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) -+ .addReg(ScratchReg) -+ .addReg(DestReg); -+ -+ if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { -+ const char *SymName = Symbol.getSymbolName(); -+ Part0.addExternalSymbol(SymName, MO0); -+ Part1.addExternalSymbol(SymName, MO1); -+ Part2.addExternalSymbol(SymName, MO2); -+ Part3.addExternalSymbol(SymName, MO3); -+ } else { -+ Part0.addDisp(Symbol, 0, MO0); -+ Part1.addDisp(Symbol, 0, MO1); -+ Part2.addDisp(Symbol, 0, MO2); -+ Part3.addDisp(Symbol, 0, MO3); -+ } -+ -+ if (EraseFromParent) -+ MI.eraseFromParent(); -+ -+ return true; -+} -+ -+bool LoongArchExpandPseudo::expandLoadAddressPcrelLarge( -+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI) { -+ // Emit the 5-insn large address load sequence with the `%pc` family of -+ // relocs. -+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, -+ LoongArchII::MO_PCREL_LO); -+} -+ -+bool LoongArchExpandPseudo::expandLoadAddressGotLarge( -+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI) { -+ // Emit the 5-insn large address load sequence with the `%got_pc` family -+ // of relocs, loading the result from GOT with `ldx.d` in the end. -+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, -+ LoongArchII::MO_GOT_PC_HI); -+} -+ -+bool LoongArchExpandPseudo::expandLoadAddressTLSIELarge( -+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI) { -+ // Emit the 5-insn large address load sequence with the `%ie_pc` family -+ // of relocs, loading the result with `ldx.d` in the end. -+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, -+ LoongArchII::MO_IE_PC_LO); -+} -+ -+bool LoongArchExpandPseudo::expandLoadAddressTLSLDLarge( -+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI) { -+ // Emit the 5-insn large address load sequence with the `%got_pc` family -+ // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. -+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, -+ LoongArchII::MO_LD_PC_HI); -+} -+ -+bool LoongArchExpandPseudo::expandLoadAddressTLSGDLarge( -+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI) { -+ // Emit the 5-insn large address load sequence with the `%got_pc` family -+ // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. -+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, -+ LoongArchII::MO_GD_PC_HI); -+} -+ -+bool LoongArchExpandPseudo::expandFunctionCALL( -+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -+ MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { -+ MachineFunction *MF = MBB.getParent(); -+ MachineInstr &MI = *MBBI; -+ DebugLoc DL = MI.getDebugLoc(); -+ const MachineOperand &Func = MI.getOperand(0); -+ MachineInstrBuilder CALL; -+ unsigned Opcode; -+ -+ switch (MF->getTarget().getCodeModel()) { -+ default: -+ report_fatal_error("Unsupported code model"); -+ break; -+ case CodeModel::Small: { -+ // CALL: -+ // bl func -+ // TAIL: -+ // b func -+ Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL; -+ CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func); -+ break; -+ } -+ case CodeModel::Medium: { -+ // CALL: -+ // pcaddu18i $ra, %call36(func) -+ // jirl $ra, $ra, 0 -+ // TAIL: -+ // pcaddu18i $t8, %call36(func) -+ // jr $t8 -+ Opcode = -+ IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; -+ Register ScratchReg = IsTailCall ? LoongArch::R20 : LoongArch::R1; -+ MachineInstrBuilder MIB = -+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg); -+ -+ CALL = -+ BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0); -+ -+ if (Func.isSymbol()) -+ MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36); -+ else -+ MIB.addDisp(Func, 0, LoongArchII::MO_CALL36); -+ break; -+ } -+ case CodeModel::Large: { -+ // Emit the 5-insn large address load sequence, either directly or -+ // indirectly in case of going through the GOT, then JIRL_TAIL or -+ // JIRL_CALL to $addr. -+ Opcode = -+ IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; -+ Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1; -+ -+ bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); -+ unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; -+ unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; -+ expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, -+ false); -+ CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); -+ break; -+ } -+ } -+ -+ // Transfer implicit operands. -+ CALL.copyImplicitOps(MI); -+ -+ // Transfer MI flags. -+ CALL.setMIFlags(MI.getFlags()); -+ -+ MI.eraseFromParent(); -+ return true; -+} -+ - } // end namespace - - INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo", -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 4fc2b4709840..df1b17649b7d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -3389,8 +3389,12 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { - - // TODO: Add more target-dependent nodes later. - NODE_NAME_CASE(CALL) -+ NODE_NAME_CASE(CALL_MEDIUM) -+ NODE_NAME_CASE(CALL_LARGE) - NODE_NAME_CASE(RET) - NODE_NAME_CASE(TAIL) -+ NODE_NAME_CASE(TAIL_MEDIUM) -+ NODE_NAME_CASE(TAIL_LARGE) - NODE_NAME_CASE(SLL_W) - NODE_NAME_CASE(SRA_W) - NODE_NAME_CASE(SRL_W) -@@ -4248,15 +4252,31 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, - - // Emit the call. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); -+ unsigned Op; -+ switch (DAG.getTarget().getCodeModel()) { -+ default: -+ report_fatal_error("Unsupported code model"); -+ case CodeModel::Small: -+ Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; -+ break; -+ case CodeModel::Medium: -+ assert(Subtarget.is64Bit() && "Medium code model requires LA64"); -+ Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; -+ break; -+ case CodeModel::Large: -+ assert(Subtarget.is64Bit() && "Large code model requires LA64"); -+ Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; -+ break; -+ } - - if (IsTailCall) { - MF.getFrameInfo().setHasTailCall(); -- SDValue Ret = DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops); -+ SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops); - DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); - return Ret; - } - -- Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); -+ Chain = DAG.getNode(Op, DL, NodeTys, Ops); - DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); - Glue = Chain.getValue(1); - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 2c9826a13237..a2ed149f4bb7 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -28,8 +28,12 @@ enum NodeType : unsigned { - - // TODO: add more LoongArchISDs - CALL, -+ CALL_MEDIUM, -+ CALL_LARGE, - RET, - TAIL, -+ TAIL_MEDIUM, -+ TAIL_LARGE, - - // 32-bit shifts, directly matching the semantics of the named LoongArch - // instructions. -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index 67de5f7afd78..ecd0c2b71b85 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -69,6 +69,18 @@ def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, - def loongarch_tail : SDNode<"LoongArchISD::TAIL", SDT_LoongArchCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -+def loongarch_call_medium : SDNode<"LoongArchISD::CALL_MEDIUM", SDT_LoongArchCall, -+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, -+ SDNPVariadic]>; -+def loongarch_tail_medium : SDNode<"LoongArchISD::TAIL_MEDIUM", SDT_LoongArchCall, -+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, -+ SDNPVariadic]>; -+def loongarch_call_large : SDNode<"LoongArchISD::CALL_LARGE", SDT_LoongArchCall, -+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, -+ SDNPVariadic]>; -+def loongarch_tail_large : SDNode<"LoongArchISD::TAIL_LARGE", SDT_LoongArchCall, -+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, -+ SDNPVariadic]>; - def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; - def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; - def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; -@@ -1327,16 +1339,43 @@ def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; - def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), - (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; - -+// Function call with 'Small' code model. - let isCall = 1, Defs = [R1] in - def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func)>; - - def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; - def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; - -+// Function call with 'Medium' code model. -+let isCall = 1, Defs = [R1, R20], Size = 8 in -+def PseudoCALL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$func)>; -+ -+let Predicates = [IsLA64] in { -+def : Pat<(loongarch_call_medium tglobaladdr:$func), -+ (PseudoCALL_MEDIUM tglobaladdr:$func)>; -+def : Pat<(loongarch_call_medium texternalsym:$func), -+ (PseudoCALL_MEDIUM texternalsym:$func)>; -+} // Predicates = [IsLA64] -+ -+// Function call with 'Large' code model. -+let isCall = 1, Defs = [R1, R20], Size = 24 in -+def PseudoCALL_LARGE: Pseudo<(outs), (ins bare_symbol:$func)>; -+ -+let Predicates = [IsLA64] in { -+def : Pat<(loongarch_call_large tglobaladdr:$func), -+ (PseudoCALL_LARGE tglobaladdr:$func)>; -+def : Pat<(loongarch_call_large texternalsym:$func), -+ (PseudoCALL_LARGE texternalsym:$func)>; -+} // Predicates = [IsLA64] -+ - let isCall = 1, Defs = [R1] in - def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), - [(loongarch_call GPR:$rj)]>, - PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; -+let Predicates = [IsLA64] in { -+def : Pat<(loongarch_call_medium GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; -+def : Pat<(loongarch_call_large GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; -+} - - let isCall = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0, Defs = [R1] in - def PseudoJIRL_CALL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, -@@ -1347,6 +1386,7 @@ let isBarrier = 1, isReturn = 1, isTerminator = 1 in - def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, - PseudoInstExpansion<(JIRL R0, R1, 0)>; - -+// Tail call with 'Small' code model. - let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in - def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst)>; - -@@ -1355,10 +1395,38 @@ def : Pat<(loongarch_tail (iPTR tglobaladdr:$dst)), - def : Pat<(loongarch_tail (iPTR texternalsym:$dst)), - (PseudoTAIL texternalsym:$dst)>; - -+// Tail call with 'Medium' code model. -+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, -+ Uses = [R3], Defs = [R20], Size = 8 in -+def PseudoTAIL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$dst)>; -+ -+let Predicates = [IsLA64] in { -+def : Pat<(loongarch_tail_medium (iPTR tglobaladdr:$dst)), -+ (PseudoTAIL_MEDIUM tglobaladdr:$dst)>; -+def : Pat<(loongarch_tail_medium (iPTR texternalsym:$dst)), -+ (PseudoTAIL_MEDIUM texternalsym:$dst)>; -+} // Predicates = [IsLA64] -+ -+// Tail call with 'Large' code model. -+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, -+ Uses = [R3], Defs = [R19, R20], Size = 24 in -+def PseudoTAIL_LARGE : Pseudo<(outs), (ins bare_symbol:$dst)>; -+ -+let Predicates = [IsLA64] in { -+def : Pat<(loongarch_tail_large (iPTR tglobaladdr:$dst)), -+ (PseudoTAIL_LARGE tglobaladdr:$dst)>; -+def : Pat<(loongarch_tail_large (iPTR texternalsym:$dst)), -+ (PseudoTAIL_LARGE texternalsym:$dst)>; -+} // Predicates = [IsLA64] -+ - let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in - def PseudoTAILIndirect : Pseudo<(outs), (ins GPRT:$rj), - [(loongarch_tail GPRT:$rj)]>, - PseudoInstExpansion<(JIRL R0, GPR:$rj, 0)>; -+let Predicates = [IsLA64] in { -+def : Pat<(loongarch_tail_medium GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; -+def : Pat<(loongarch_tail_large GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; -+} - - let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - hasSideEffects = 0, mayStore = 0, mayLoad = 0, Uses = [R3] in -@@ -1396,6 +1464,7 @@ def PseudoLA_ABS_LARGE : Pseudo<(outs GPR:$dst), - "la.abs", "$dst, $src">; - def PseudoLA_PCREL : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.pcrel", "$dst, $src">; -+let Defs = [R20], Size = 20 in - def PseudoLA_PCREL_LARGE : Pseudo<(outs GPR:$dst), - (ins GPR:$tmp, bare_symbol:$src), [], - "la.pcrel", "$dst, $tmp, $src">, -@@ -1407,28 +1476,30 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, - isAsmParserOnly = 1 in { - def PseudoLA_GOT : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.got", "$dst, $src">; -+def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], -+ "la.tls.ie", "$dst, $src">; -+def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], -+ "la.tls.ld", "$dst, $src">; -+def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], -+ "la.tls.gd", "$dst, $src">; -+let Defs = [R20], Size = 20 in { - def PseudoLA_GOT_LARGE : Pseudo<(outs GPR:$dst), - (ins GPR:$tmp, bare_symbol:$src), [], - "la.got", "$dst, $tmp, $src">, - Requires<[IsLA64]>; --def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], -- "la.tls.ie", "$dst, $src">; - def PseudoLA_TLS_IE_LARGE : Pseudo<(outs GPR:$dst), - (ins GPR:$tmp, bare_symbol:$src), [], - "la.tls.ie", "$dst, $tmp, $src">, - Requires<[IsLA64]>; --def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], -- "la.tls.ld", "$dst, $src">; - def PseudoLA_TLS_LD_LARGE : Pseudo<(outs GPR:$dst), - (ins GPR:$tmp, bare_symbol:$src), [], - "la.tls.ld", "$dst, $tmp, $src">, - Requires<[IsLA64]>; --def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], -- "la.tls.gd", "$dst, $src">; - def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst), - (ins GPR:$tmp, bare_symbol:$src), [], - "la.tls.gd", "$dst, $tmp, $src">, - Requires<[IsLA64]>; -+} // Defs = [R20], Size = 20 - } - - // Load address inst alias: "la", "la.global" and "la.local". -diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll -index 7c6f46d5e926..f93c31670928 100644 ---- a/llvm/test/CodeGen/LoongArch/code-models.ll -+++ b/llvm/test/CodeGen/LoongArch/code-models.ll -@@ -33,11 +33,11 @@ define i32 @call_globaladdress(i32 %a) nounwind { - ; LARGE: # %bb.0: - ; LARGE-NEXT: addi.d $sp, $sp, -16 - ; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill --; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee) --; LARGE-NEXT: addi.d $ra, $zero, %got_pc_lo12(callee) --; LARGE-NEXT: lu32i.d $ra, %got64_pc_lo20(callee) --; LARGE-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(callee) --; LARGE-NEXT: ldx.d $ra, $ra, $a1 -+; LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(callee) -+; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee) -+; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee) -+; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee) -+; LARGE-NEXT: ldx.d $ra, $t8, $ra - ; LARGE-NEXT: jirl $ra, $ra, 0 - ; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload - ; LARGE-NEXT: addi.d $sp, $sp, 16 -@@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) { - ; LARGE-NEXT: .cfi_offset 1, -8 - ; LARGE-NEXT: ori $a2, $zero, 1000 - ; LARGE-NEXT: move $a1, $zero --; LARGE-NEXT: pcalau12i $a3, %pc_hi20(memset) --; LARGE-NEXT: addi.d $ra, $zero, %pc_lo12(memset) --; LARGE-NEXT: lu32i.d $ra, %pc64_lo20(memset) --; LARGE-NEXT: lu52i.d $ra, $ra, %pc64_hi12(memset) --; LARGE-NEXT: add.d $ra, $ra, $a3 -+; LARGE-NEXT: pcalau12i $ra, %pc_hi20(memset) -+; LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(memset) -+; LARGE-NEXT: lu32i.d $t8, %pc64_lo20(memset) -+; LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(memset) -+; LARGE-NEXT: add.d $ra, $t8, $ra - ; LARGE-NEXT: jirl $ra, $ra, 0 - ; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload - ; LARGE-NEXT: addi.d $sp, $sp, 16 -@@ -105,17 +105,17 @@ define i32 @caller_tail(i32 %i) nounwind { - ; - ; MEDIUM-LABEL: caller_tail: - ; MEDIUM: # %bb.0: # %entry --; MEDIUM-NEXT: pcaddu18i $a1, %call36(callee_tail) --; MEDIUM-NEXT: jr $a1 -+; MEDIUM-NEXT: pcaddu18i $t8, %call36(callee_tail) -+; MEDIUM-NEXT: jr $t8 - ; - ; LARGE-LABEL: caller_tail: - ; LARGE: # %bb.0: # %entry --; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee_tail) --; LARGE-NEXT: addi.d $a2, $zero, %got_pc_lo12(callee_tail) --; LARGE-NEXT: lu32i.d $a2, %got64_pc_lo20(callee_tail) --; LARGE-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(callee_tail) --; LARGE-NEXT: ldx.d $a1, $a2, $a1 --; LARGE-NEXT: jr $a1 -+; LARGE-NEXT: pcalau12i $t7, %got_pc_hi20(callee_tail) -+; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee_tail) -+; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee_tail) -+; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee_tail) -+; LARGE-NEXT: ldx.d $t7, $t8, $t7 -+; LARGE-NEXT: jr $t7 - entry: - %r = tail call i32 @callee_tail(i32 %i) - ret i32 %r -diff --git a/llvm/test/CodeGen/LoongArch/expand-call.ll b/llvm/test/CodeGen/LoongArch/expand-call.ll -index 86bf4292665b..e0d179f92de6 100644 ---- a/llvm/test/CodeGen/LoongArch/expand-call.ll -+++ b/llvm/test/CodeGen/LoongArch/expand-call.ll -@@ -1,6 +1,6 @@ - ; RUN: llc --mtriple=loongarch64 --stop-before loongarch-prera-expand-pseudo \ - ; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=NOEXPAND --; RUN: llc --mtriple=loongarch64 --stop-after loongarch-prera-expand-pseudo \ -+; RUN: llc --mtriple=loongarch64 --stop-before machine-opt-remark-emitter \ - ; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=EXPAND - - declare void @callee() -diff --git a/llvm/test/CodeGen/LoongArch/global-address.ll b/llvm/test/CodeGen/LoongArch/global-address.ll -index a8f0ef648aa7..d32a17f488b1 100644 ---- a/llvm/test/CodeGen/LoongArch/global-address.ll -+++ b/llvm/test/CodeGen/LoongArch/global-address.ll -@@ -53,32 +53,32 @@ define void @foo() nounwind { - ; LA64LARGENOPIC-LABEL: foo: - ; LA64LARGENOPIC: # %bb.0: - ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) --; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) --; LA64LARGENOPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G) --; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) --; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 -+; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) -+; LA64LARGENOPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G) -+; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) -+; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 - ; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 - ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g) --; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %pc_lo12(g) --; LA64LARGENOPIC-NEXT: lu32i.d $a1, %pc64_lo20(g) --; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) --; LA64LARGENOPIC-NEXT: add.d $a0, $a1, $a0 -+; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %pc_lo12(g) -+; LA64LARGENOPIC-NEXT: lu32i.d $t8, %pc64_lo20(g) -+; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) -+; LA64LARGENOPIC-NEXT: add.d $a0, $t8, $a0 - ; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 - ; LA64LARGENOPIC-NEXT: ret - ; - ; LA64LARGEPIC-LABEL: foo: - ; LA64LARGEPIC: # %bb.0: - ; LA64LARGEPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) --; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) --; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G) --; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) --; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0 -+; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) -+; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G) -+; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) -+; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0 - ; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 - ; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local) --; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %pc_lo12(.Lg$local) --; LA64LARGEPIC-NEXT: lu32i.d $a1, %pc64_lo20(.Lg$local) --; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(.Lg$local) --; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 -+; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(.Lg$local) -+; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(.Lg$local) -+; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(.Lg$local) -+; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 - ; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 - ; LA64LARGEPIC-NEXT: ret - %V = load volatile i32, ptr @G -diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll -index a515939b9c2b..474436a0126b 100644 ---- a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll -+++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll -@@ -48,13 +48,13 @@ define void @foo() nounwind { - ; MEDIUM_SCH-NEXT: addi.d $sp, $sp, -16 - ; MEDIUM_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill - ; MEDIUM_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) --; MEDIUM_SCH-NEXT: pcaddu18i $ra, %call36(bar) - ; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) - ; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 - ; MEDIUM_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) - ; MEDIUM_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) - ; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 - ; MEDIUM_SCH-NEXT: ori $a0, $zero, 1 -+; MEDIUM_SCH-NEXT: pcaddu18i $ra, %call36(bar) - ; MEDIUM_SCH-NEXT: jirl $ra, $ra, 0 - ; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) - ; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) -@@ -74,41 +74,41 @@ define void @foo() nounwind { - ; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, -16 - ; LARGE_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill - ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) --; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) --; LARGE_NO_SCH-NEXT: lu32i.d $a1, %got64_pc_lo20(G) --; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) --; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) -+; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G) -+; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 - ; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 - ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) --; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %pc_lo12(g) --; LARGE_NO_SCH-NEXT: lu32i.d $a1, %pc64_lo20(g) --; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) --; LARGE_NO_SCH-NEXT: add.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g) -+; LARGE_NO_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g) -+; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) -+; LARGE_NO_SCH-NEXT: add.d $a0, $t8, $a0 - ; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 - ; LARGE_NO_SCH-NEXT: ori $a0, $zero, 1 --; LARGE_NO_SCH-NEXT: pcalau12i $a1, %got_pc_hi20(bar) --; LARGE_NO_SCH-NEXT: addi.d $ra, $zero, %got_pc_lo12(bar) --; LARGE_NO_SCH-NEXT: lu32i.d $ra, %got64_pc_lo20(bar) --; LARGE_NO_SCH-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(bar) --; LARGE_NO_SCH-NEXT: ldx.d $ra, $ra, $a1 -+; LARGE_NO_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar) -+; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar) -+; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(bar) -+; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(bar) -+; LARGE_NO_SCH-NEXT: ldx.d $ra, $t8, $ra - ; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 - ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) --; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(gd) --; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(gd) --; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(gd) --; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(gd) -+; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd) -+; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd) -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 - ; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp - ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) --; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) --; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) --; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) --; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) -+; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) -+; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 - ; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp - ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) --; LARGE_NO_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) --; LARGE_NO_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) --; LARGE_NO_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) --; LARGE_NO_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) -+; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) -+; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) -+; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 - ; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp - ; LARGE_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload - ; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, 16 -@@ -118,42 +118,42 @@ define void @foo() nounwind { - ; LARGE_SCH: # %bb.0: - ; LARGE_SCH-NEXT: addi.d $sp, $sp, -16 - ; LARGE_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill --; LARGE_SCH-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) - ; LARGE_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) --; LARGE_SCH-NEXT: addi.d $ra, $zero, %got_pc_lo12(bar) --; LARGE_SCH-NEXT: lu32i.d $a1, %got64_pc_lo20(G) --; LARGE_SCH-NEXT: lu32i.d $ra, %got64_pc_lo20(bar) --; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) --; LARGE_SCH-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(bar) --; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 --; LARGE_SCH-NEXT: addi.d $a1, $zero, %pc_lo12(g) --; LARGE_SCH-NEXT: lu32i.d $a1, %pc64_lo20(g) --; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) -+; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) -+; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G) -+; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) -+; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 - ; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 - ; LARGE_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) --; LARGE_SCH-NEXT: add.d $a0, $a1, $a0 --; LARGE_SCH-NEXT: pcalau12i $a1, %got_pc_hi20(bar) -+; LARGE_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g) -+; LARGE_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g) -+; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) -+; LARGE_SCH-NEXT: add.d $a0, $t8, $a0 - ; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 --; LARGE_SCH-NEXT: ldx.d $ra, $ra, $a1 - ; LARGE_SCH-NEXT: ori $a0, $zero, 1 -+; LARGE_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar) -+; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar) -+; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(bar) -+; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(bar) -+; LARGE_SCH-NEXT: ldx.d $ra, $t8, $ra - ; LARGE_SCH-NEXT: jirl $ra, $ra, 0 --; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(gd) - ; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) --; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(gd) --; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(gd) --; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 --; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) --; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) --; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) -+; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(gd) -+; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd) -+; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd) -+; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 - ; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp - ; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) --; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 --; LARGE_SCH-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) --; LARGE_SCH-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) --; LARGE_SCH-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) -+; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) -+; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) -+; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) -+; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 - ; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp - ; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) --; LARGE_SCH-NEXT: ldx.d $a0, $a1, $a0 -+; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) -+; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) -+; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) -+; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 - ; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp - ; LARGE_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload - ; LARGE_SCH-NEXT: addi.d $sp, $sp, 16 -diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll -index a2a3792a6a54..3994df1da716 100644 ---- a/llvm/test/CodeGen/LoongArch/tls-models.ll -+++ b/llvm/test/CodeGen/LoongArch/tls-models.ll -@@ -45,15 +45,15 @@ define ptr @f1() nounwind { - ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 - ; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill - ; LA64LARGEPIC-NEXT: pcalau12i $a0, %gd_pc_hi20(unspecified) --; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(unspecified) --; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(unspecified) --; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(unspecified) --; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 --; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) --; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) --; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) --; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) --; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 -+; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(unspecified) -+; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(unspecified) -+; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(unspecified) -+; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 -+; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -+; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -+; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -+; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -+; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra - ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 - ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload - ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 -@@ -76,10 +76,10 @@ define ptr @f1() nounwind { - ; LA64LARGENOPIC-LABEL: f1: - ; LA64LARGENOPIC: # %bb.0: # %entry - ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) --; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(unspecified) --; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(unspecified) --; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(unspecified) --; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 -+; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(unspecified) -+; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(unspecified) -+; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(unspecified) -+; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 - ; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp - ; LA64LARGENOPIC-NEXT: ret - entry: -@@ -116,15 +116,15 @@ define ptr @f2() nounwind { - ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 - ; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill - ; LA64LARGEPIC-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) --; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(ld) --; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(ld) --; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(ld) --; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 --; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) --; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) --; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) --; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) --; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 -+; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(ld) -+; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) -+; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) -+; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 -+; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -+; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -+; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -+; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -+; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra - ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 - ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload - ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 -@@ -147,10 +147,10 @@ define ptr @f2() nounwind { - ; LA64LARGENOPIC-LABEL: f2: - ; LA64LARGENOPIC: # %bb.0: # %entry - ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) --; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) --; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) --; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) --; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 -+; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) -+; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) -+; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) -+; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 - ; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp - ; LA64LARGENOPIC-NEXT: ret - entry: -@@ -177,10 +177,10 @@ define ptr @f3() nounwind { - ; LA64LARGEPIC-LABEL: f3: - ; LA64LARGEPIC: # %bb.0: # %entry - ; LA64LARGEPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) --; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) --; LA64LARGEPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) --; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) --; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0 -+; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) -+; LA64LARGEPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) -+; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) -+; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0 - ; LA64LARGEPIC-NEXT: add.d $a0, $a0, $tp - ; LA64LARGEPIC-NEXT: ret - ; -@@ -201,10 +201,10 @@ define ptr @f3() nounwind { - ; LA64LARGENOPIC-LABEL: f3: - ; LA64LARGENOPIC: # %bb.0: # %entry - ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) --; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) --; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) --; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) --; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 -+; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) -+; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) -+; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) -+; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 - ; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp - ; LA64LARGENOPIC-NEXT: ret - entry: --- -2.20.1 - - -From 34e8c30579faf4a8ef69fa686bd9b2d9e832d299 Mon Sep 17 00:00:00 2001 -From: Jie Fu -Date: Fri, 5 Jan 2024 12:05:23 +0800 -Subject: [PATCH 06/12] [LoongArch] Fix -Wunused-variable in - LoongArchExpandPseudoInsts.cpp (NFC) - -llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp:480:20: - error: unused variable 'MF' [-Werror,-Wunused-variable] - MachineFunction *MF = MBB.getParent(); - ^ -1 error generated. - -(cherry picked from commit 52d1397e38ee88b170585c9c824d08e6975890ca) ---- - llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp -index f977f176066a..ad39658f698e 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp -@@ -477,12 +477,11 @@ bool LoongArchExpandPseudo::expandLargeAddressLoad( - break; - } - -- MachineFunction *MF = MBB.getParent(); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - Register ScratchReg = LoongArch::R20; // $t8 - -- assert(MF->getSubtarget().is64Bit() && -+ assert(MBB.getParent()->getSubtarget().is64Bit() && - "Large code model requires LA64"); - - auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), DestReg); --- -2.20.1 - - -From b5d3aa3ac0dcf98fbb5f8d2d9de295be991c9e8f Mon Sep 17 00:00:00 2001 +From b9310080fd24bd856727f7e22dc811a278b400b5 Mon Sep 17 00:00:00 2001 From: Zhaoxin Yang Date: Tue, 23 Jul 2024 12:06:59 +0800 -Subject: [PATCH 07/12] [LoongArch][CodeGen] Implement 128-bit and 256-bit +Subject: [PATCH 16/23] [LoongArch][CodeGen] Implement 128-bit and 256-bit vector shuffle. (#100054) [LoongArch][CodeGen] Implement 128-bit and 256-bit vector shuffle @@ -2068,6 +20,7 @@ vectors and processing single element patterns, etc. (cherry picked from commit 464ea880cf7710cc8675c83001d7ae020406cf42) --- + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 4 +- .../LoongArch/LoongArchISelLowering.cpp | 933 +++++++++++++++++- .../Target/LoongArch/LoongArchISelLowering.h | 10 + .../LoongArch/LoongArchLASXInstrInfo.td | 130 +++ @@ -2084,7 +37,7 @@ vectors and processing single element patterns, etc. .../lsx/ir-instruction/shuffle-as-vreplvei.ll | 62 ++ .../lsx/ir-instruction/shuffle-as-vshuf.ll | 84 ++ .../lsx/ir-instruction/shuffle-as-vshuf4i.ll | 42 + - 16 files changed, 2158 insertions(+), 3 deletions(-) + 17 files changed, 2160 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll @@ -2098,6 +51,28 @@ vectors and processing single element patterns, etc. create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +index 2d9c3f810a06..8b3d2837a4e5 100644 +--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -216,7 +216,7 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + // Option -msimd=lsx depends on 64-bit FPU. + // -m*-float and -mfpu=none/0/32 conflict with -mlsx. + if (llvm::find(Features, "-d") != Features.end()) +- D.Diag(diag::err_drv_loongarch_wrong_fpu_width) << /*LSX*/ 0; ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); + // The previous option does not contain feature -lsx. + else if (llvm::find(Features, "-lsx") == Features.end()) + Features.push_back("+lsx"); +@@ -224,7 +224,7 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + // Option -msimd=lasx depends on 64-bit FPU and LSX. + // -m*-float and -mfpu=none/0/32 conflict with -mlsx. + if (llvm::find(Features, "-d") != Features.end()) +- D.Diag(diag::err_drv_loongarch_wrong_fpu_width) << /*LASX*/ 1; ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); + else if (llvm::find(Features, "-lsx") != Features.end()) + D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); + // The previous option does not contain feature -lasx. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index df1b17649b7d..618ae7056425 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -4426,1038 +2401,3 @@ index 000000000000..660b9581c3d1 -- 2.20.1 - -From cac0cc4649362e0b80f61e45aec54341f40f7f77 Mon Sep 17 00:00:00 2001 -From: Ami-zhang -Date: Wed, 17 Jan 2024 11:15:05 +0800 -Subject: [PATCH 08/12] [LoongArch] Add LoongArch V1.1 instructions definitions - and MC tests (#78238) - -LoongArch V1.1 instrucions include floating-point approximate reciprocal -instructions and atomic instrucions. And add testcases for these -instrucions meanwhile. - -(cherry picked from commit 84bdee2875da364be7eb2144b1ae530f6a05f0e2) ---- - .../LoongArch/LoongArchFloat32InstrInfo.td | 2 + - .../LoongArch/LoongArchFloat64InstrInfo.td | 2 + - .../Target/LoongArch/LoongArchInstrInfo.td | 34 ++++++- - .../LoongArch/LoongArchLASXInstrInfo.td | 4 + - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 + - llvm/test/MC/LoongArch/Basic/Float/d-arith.s | 8 ++ - llvm/test/MC/LoongArch/Basic/Float/f-arith.s | 8 ++ - llvm/test/MC/LoongArch/Basic/Integer/atomic.s | 92 +++++++++++++++++++ - llvm/test/MC/LoongArch/lasx/frecip.s | 8 ++ - llvm/test/MC/LoongArch/lasx/frsqrt.s | 8 ++ - llvm/test/MC/LoongArch/lsx/frecip.s | 8 ++ - llvm/test/MC/LoongArch/lsx/frsqrt.s | 8 ++ - 12 files changed, 184 insertions(+), 2 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -index 65120c083f49..f30837912e75 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -@@ -50,6 +50,8 @@ def FNEG_S : FP_ALU_2R<0x01141400>; - def FSQRT_S : FP_ALU_2R<0x01144400>; - def FRECIP_S : FP_ALU_2R<0x01145400>; - def FRSQRT_S : FP_ALU_2R<0x01146400>; -+def FRECIPE_S : FP_ALU_2R<0x01147400>; -+def FRSQRTE_S : FP_ALU_2R<0x01148400>; - def FSCALEB_S : FP_ALU_3R<0x01108000>; - def FLOGB_S : FP_ALU_2R<0x01142400>; - def FCOPYSIGN_S : FP_ALU_3R<0x01128000>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -index 437c1e4d7be2..0ea4c564b045 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -@@ -34,6 +34,8 @@ def FNEG_D : FP_ALU_2R<0x01141800, FPR64>; - def FSQRT_D : FP_ALU_2R<0x01144800, FPR64>; - def FRECIP_D : FP_ALU_2R<0x01145800, FPR64>; - def FRSQRT_D : FP_ALU_2R<0x01146800, FPR64>; -+def FRECIPE_D : FP_ALU_2R<0x01147800, FPR64>; -+def FRSQRTE_D : FP_ALU_2R<0x01148800, FPR64>; - def FSCALEB_D : FP_ALU_3R<0x01110000, FPR64>; - def FLOGB_D : FP_ALU_2R<0x01142800, FPR64>; - def FCOPYSIGN_D : FP_ALU_3R<0x01130000, FPR64>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index ecd0c2b71b85..756c460f916b 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -634,15 +634,24 @@ class AM_3R op> - : Fmt3R; - --let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in -+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { - class LLBase op> - : Fmt2RI14; -+class LLBase_ACQ op> -+ : Fmt2R; -+} - --let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Constraints = "$rd = $dst" in -+let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Constraints = "$rd = $dst" in { - class SCBase op> - : Fmt2RI14; -+class SCBase_128 op> -+ : Fmt3R; -+class SCBase_REL op> -+ : Fmt2R; -+} - - let hasSideEffects = 1 in - class IOCSRRD op> -@@ -754,6 +763,8 @@ def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), - // Atomic Memory Access Instructions - def LL_W : LLBase<0x20000000>; - def SC_W : SCBase<0x21000000>; -+def LLACQ_W : LLBase_ACQ<0x38578000>; -+def SCREL_W : SCBase_REL<0x38578400>; - - // Barrier Instructions - def DBAR : MISC_I15<0x38720000>; -@@ -875,8 +886,12 @@ def STLE_W : STORE_3R<0x387f0000>; - def STLE_D : STORE_3R<0x387f8000>; - - // Atomic Memory Access Instructions for 64-bits -+def AMSWAP_B : AM_3R<0x385c0000>; -+def AMSWAP_H : AM_3R<0x385c8000>; - def AMSWAP_W : AM_3R<0x38600000>; - def AMSWAP_D : AM_3R<0x38608000>; -+def AMADD_B : AM_3R<0x385d0000>; -+def AMADD_H : AM_3R<0x385d8000>; - def AMADD_W : AM_3R<0x38610000>; - def AMADD_D : AM_3R<0x38618000>; - def AMAND_W : AM_3R<0x38620000>; -@@ -893,8 +908,12 @@ def AMMAX_WU : AM_3R<0x38670000>; - def AMMAX_DU : AM_3R<0x38678000>; - def AMMIN_WU : AM_3R<0x38680000>; - def AMMIN_DU : AM_3R<0x38688000>; -+def AMSWAP__DB_B : AM_3R<0x385e0000>; -+def AMSWAP__DB_H : AM_3R<0x385e8000>; - def AMSWAP__DB_W : AM_3R<0x38690000>; - def AMSWAP__DB_D : AM_3R<0x38698000>; -+def AMADD__DB_B : AM_3R<0x385f0000>; -+def AMADD__DB_H : AM_3R<0x385f8000>; - def AMADD__DB_W : AM_3R<0x386a0000>; - def AMADD__DB_D : AM_3R<0x386a8000>; - def AMAND__DB_W : AM_3R<0x386b0000>; -@@ -911,8 +930,19 @@ def AMMAX__DB_WU : AM_3R<0x38700000>; - def AMMAX__DB_DU : AM_3R<0x38708000>; - def AMMIN__DB_WU : AM_3R<0x38710000>; - def AMMIN__DB_DU : AM_3R<0x38718000>; -+def AMCAS_B : AM_3R<0x38580000>; -+def AMCAS_H : AM_3R<0x38588000>; -+def AMCAS_W : AM_3R<0x38590000>; -+def AMCAS_D : AM_3R<0x38598000>; -+def AMCAS__DB_B : AM_3R<0x385a0000>; -+def AMCAS__DB_H : AM_3R<0x385a8000>; -+def AMCAS__DB_W : AM_3R<0x385b0000>; -+def AMCAS__DB_D : AM_3R<0x385b8000>; - def LL_D : LLBase<0x22000000>; - def SC_D : SCBase<0x23000000>; -+def SC_Q : SCBase_128<0x38570000>; -+def LLACQ_D : LLBase_ACQ<0x38578800>; -+def SCREL_D : SCBase_REL<0x38578C00>; - - // CRC Check Instructions - def CRC_W_B_W : ALU_3R<0x00240000>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 5b6721cdf1b4..454915ac8c0a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -773,6 +773,10 @@ def XVFRECIP_S : LASX2R_XX<0x769cf400>; - def XVFRECIP_D : LASX2R_XX<0x769cf800>; - def XVFRSQRT_S : LASX2R_XX<0x769d0400>; - def XVFRSQRT_D : LASX2R_XX<0x769d0800>; -+def XVFRECIPE_S : LASX2R_XX<0x769d1400>; -+def XVFRECIPE_D : LASX2R_XX<0x769d1800>; -+def XVFRSQRTE_S : LASX2R_XX<0x769d2400>; -+def XVFRSQRTE_D : LASX2R_XX<0x769d2800>; - - def XVFCVTL_S_H : LASX2R_XX<0x769de800>; - def XVFCVTH_S_H : LASX2R_XX<0x769dec00>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 3519fa3142c3..6d60d7074ec3 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -918,6 +918,10 @@ def VFRECIP_S : LSX2R_VV<0x729cf400>; - def VFRECIP_D : LSX2R_VV<0x729cf800>; - def VFRSQRT_S : LSX2R_VV<0x729d0400>; - def VFRSQRT_D : LSX2R_VV<0x729d0800>; -+def VFRECIPE_S : LSX2R_VV<0x729d1400>; -+def VFRECIPE_D : LSX2R_VV<0x729d1800>; -+def VFRSQRTE_S : LSX2R_VV<0x729d2400>; -+def VFRSQRTE_D : LSX2R_VV<0x729d2800>; - - def VFCVTL_S_H : LSX2R_VV<0x729de800>; - def VFCVTH_S_H : LSX2R_VV<0x729dec00>; -diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s -index 6b2c67e9a2cc..8e19d2e34f3c 100644 ---- a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s -+++ b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s -@@ -78,10 +78,18 @@ fsqrt.d $fa2, $ft3 - # ASM: encoding: [0x7b,0x5b,0x14,0x01] - frecip.d $fs3, $fs3 - -+# ASM-AND-OBJ: frecipe.d $fa0, $fa0 -+# ASM: encoding: [0x00,0x78,0x14,0x01] -+frecipe.d $fa0, $fa0 -+ - # ASM-AND-OBJ: frsqrt.d $ft14, $fa3 - # ASM: encoding: [0x76,0x68,0x14,0x01] - frsqrt.d $ft14, $fa3 - -+# ASM-AND-OBJ: frsqrte.d $fa1, $fa1 -+# ASM: encoding: [0x21,0x88,0x14,0x01] -+frsqrte.d $fa1, $fa1 -+ - # ASM-AND-OBJ: fscaleb.d $ft4, $ft6, $fs2 - # ASM: encoding: [0xcc,0x69,0x11,0x01] - fscaleb.d $ft4, $ft6, $fs2 -diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s -index 155e783cf435..c32151adbf3b 100644 ---- a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s -+++ b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s -@@ -73,10 +73,18 @@ fsqrt.s $fs3, $ft10 - # ASM: encoding: [0x71,0x57,0x14,0x01] - frecip.s $ft9, $fs3 - -+# ASM-AND-OBJ: frecipe.s $fa0, $fa0 -+# ASM: encoding: [0x00,0x74,0x14,0x01] -+frecipe.s $fa0, $fa0 -+ - # ASM-AND-OBJ: frsqrt.s $fs1, $ft4 - # ASM: encoding: [0x99,0x65,0x14,0x01] - frsqrt.s $fs1, $ft4 - -+# ASM-AND-OBJ: frsqrte.s $fa1, $fa1 -+# ASM: encoding: [0x21,0x84,0x14,0x01] -+frsqrte.s $fa1, $fa1 -+ - # ASM-AND-OBJ: fscaleb.s $ft13, $ft15, $fa6 - # ASM: encoding: [0xf5,0x9a,0x10,0x01] - fscaleb.s $ft13, $ft15, $fa6 -diff --git a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s -index a35211db8851..69acdeef935c 100644 ---- a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s -+++ b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s -@@ -21,6 +21,14 @@ ll.w $tp, $s4, 220 - # CHECK-ASM: encoding: [0xd3,0x39,0x00,0x21] - sc.w $t7, $t2, 56 - -+# CHECK-ASM-AND-OBJ: llacq.w $t1, $t2 -+# CHECK-ASM: encoding: [0xcd,0x81,0x57,0x38] -+llacq.w $t1, $t2 -+ -+# CHECK-ASM-AND-OBJ: screl.w $t1, $t2 -+# CHECK-ASM: encoding: [0xcd,0x85,0x57,0x38] -+screl.w $t1, $t2 -+ - - - ############################################################# -@@ -29,6 +37,14 @@ sc.w $t7, $t2, 56 - - .ifdef LA64 - -+# CHECK64-ASM-AND-OBJ: amswap.b $a2, $t0, $s1 -+# CHECK64-ASM: encoding: [0x06,0x33,0x5c,0x38] -+amswap.b $a2, $t0, $s1, 0 -+ -+# CHECK64-ASM-AND-OBJ: amswap.h $a2, $t0, $s1 -+# CHECK64-ASM: encoding: [0x06,0xb3,0x5c,0x38] -+amswap.h $a2, $t0, $s1, 0 -+ - # CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 - # CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] - amswap.w $a2, $t0, $s1, 0 -@@ -41,6 +57,14 @@ amswap.w $zero, $t0, $zero - # CHECK64-ASM: encoding: [0xa0,0x00,0x6a,0x38] - amadd_db.w $zero, $zero, $a1 - -+# CHECK64-ASM-AND-OBJ: amswap.b $a2, $t0, $s1 -+# CHECK64-ASM: encoding: [0x06,0x33,0x5c,0x38] -+amswap.b $a2, $t0, $s1 -+ -+# CHECK64-ASM-AND-OBJ: amswap.h $a2, $t0, $s1 -+# CHECK64-ASM: encoding: [0x06,0xb3,0x5c,0x38] -+amswap.h $a2, $t0, $s1 -+ - # CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 - # CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] - amswap.w $a2, $t0, $s1 -@@ -49,6 +73,14 @@ amswap.w $a2, $t0, $s1 - # CHECK64-ASM: encoding: [0xc2,0xba,0x60,0x38] - amswap.d $tp, $t2, $fp - -+# CHECK64-ASM-AND-OBJ: amadd.b $a4, $t0, $r21 -+# CHECK64-ASM: encoding: [0xa8,0x32,0x5d,0x38] -+amadd.b $a4, $t0, $r21 -+ -+# CHECK64-ASM-AND-OBJ: amadd.h $a1, $t5, $s6 -+# CHECK64-ASM: encoding: [0xa5,0xc7,0x5d,0x38] -+amadd.h $a1, $t5, $s6 -+ - # CHECK64-ASM-AND-OBJ: amadd.w $a4, $t0, $r21 - # CHECK64-ASM: encoding: [0xa8,0x32,0x61,0x38] - amadd.w $a4, $t0, $r21 -@@ -113,6 +145,14 @@ ammin.wu $a4, $t6, $s7 - # CHECK64-ASM: encoding: [0x27,0xc3,0x68,0x38] - ammin.du $a3, $t4, $s2 - -+# CHECK64-ASM-AND-OBJ: amswap_db.b $a2, $t0, $s1 -+# CHECK64-ASM: encoding: [0x06,0x33,0x5e,0x38] -+amswap_db.b $a2, $t0, $s1 -+ -+# CHECK64-ASM-AND-OBJ: amswap_db.h $tp, $t2, $fp -+# CHECK64-ASM: encoding: [0xc2,0xba,0x5e,0x38] -+amswap_db.h $tp, $t2, $fp -+ - # CHECK64-ASM-AND-OBJ: amswap_db.w $a2, $t0, $s1 - # CHECK64-ASM: encoding: [0x06,0x33,0x69,0x38] - amswap_db.w $a2, $t0, $s1 -@@ -121,6 +161,14 @@ amswap_db.w $a2, $t0, $s1 - # CHECK64-ASM: encoding: [0xc2,0xba,0x69,0x38] - amswap_db.d $tp, $t2, $fp - -+# CHECK64-ASM-AND-OBJ: amadd_db.b $zero, $zero, $a1 -+# CHECK64-ASM: encoding: [0xa0,0x00,0x5f,0x38] -+amadd_db.b $zero, $zero, $a1 -+ -+# CHECK64-ASM-AND-OBJ: amadd_db.h $a4, $t0, $r21 -+# CHECK64-ASM: encoding: [0xa8,0xb2,0x5f,0x38] -+amadd_db.h $a4, $t0, $r21 -+ - # CHECK64-ASM-AND-OBJ: amadd_db.w $a4, $t0, $r21 - # CHECK64-ASM: encoding: [0xa8,0x32,0x6a,0x38] - amadd_db.w $a4, $t0, $r21 -@@ -185,6 +233,38 @@ ammin_db.wu $a4, $t6, $s7 - # CHECK64-ASM: encoding: [0x27,0xc3,0x71,0x38] - ammin_db.du $a3, $t4, $s2 - -+# CHECK64-ASM-AND-OBJ: amcas.b $t1, $t2, $t3 -+# CHECK64-ASM: encoding: [0xed,0x39,0x58,0x38] -+amcas.b $t1, $t2, $t3 -+ -+# CHECK64-ASM-AND-OBJ: amcas.h $t1, $t2, $t3 -+# CHECK64-ASM: encoding: [0xed,0xb9,0x58,0x38] -+amcas.h $t1, $t2, $t3 -+ -+# CHECK64-ASM-AND-OBJ: amcas.w $t1, $t2, $t3 -+# CHECK64-ASM: encoding: [0xed,0x39,0x59,0x38] -+amcas.w $t1, $t2, $t3 -+ -+# CHECK64-ASM-AND-OBJ: amcas.d $t1, $t2, $t3 -+# CHECK64-ASM: encoding: [0xed,0xb9,0x59,0x38] -+amcas.d $t1, $t2, $t3 -+ -+# CHECK64-ASM-AND-OBJ: amcas_db.b $t1, $t2, $t3 -+# CHECK64-ASM: encoding: [0xed,0x39,0x5a,0x38] -+amcas_db.b $t1, $t2, $t3 -+ -+# CHECK64-ASM-AND-OBJ: amcas_db.h $t1, $t2, $t3 -+# CHECK64-ASM: encoding: [0xed,0xb9,0x5a,0x38] -+amcas_db.h $t1, $t2, $t3 -+ -+# CHECK64-ASM-AND-OBJ: amcas_db.w $t1, $t2, $t3 -+# CHECK64-ASM: encoding: [0xed,0x39,0x5b,0x38] -+amcas_db.w $t1, $t2, $t3 -+ -+# CHECK64-ASM-AND-OBJ: amcas_db.d $t1, $t2, $t3 -+# CHECK64-ASM: encoding: [0xed,0xb9,0x5b,0x38] -+amcas_db.d $t1, $t2, $t3 -+ - # CHECK64-ASM-AND-OBJ: ll.d $s2, $s4, 16 - # CHECK64-ASM: encoding: [0x79,0x13,0x00,0x22] - ll.d $s2, $s4, 16 -@@ -193,5 +273,17 @@ ll.d $s2, $s4, 16 - # CHECK64-ASM: encoding: [0x31,0xf6,0x00,0x23] - sc.d $t5, $t5, 244 - -+# CHECK64-ASM-AND-OBJ: sc.q $t7, $t2, $t5 -+# CHECK64-ASM: encoding: [0x33,0x3a,0x57,0x38] -+sc.q $t7, $t2, $t5 -+ -+# CHECK64-ASM-AND-OBJ: llacq.d $t1, $t2 -+# CHECK64-ASM: encoding: [0xcd,0x89,0x57,0x38] -+llacq.d $t1, $t2 -+ -+# CHECK64-ASM-AND-OBJ: screl.d $t1, $t2 -+# CHECK64-ASM: encoding: [0xcd,0x8d,0x57,0x38] -+screl.d $t1, $t2 -+ - .endif - -diff --git a/llvm/test/MC/LoongArch/lasx/frecip.s b/llvm/test/MC/LoongArch/lasx/frecip.s -index 1bb3ce02fb9c..e95b03a96eba 100644 ---- a/llvm/test/MC/LoongArch/lasx/frecip.s -+++ b/llvm/test/MC/LoongArch/lasx/frecip.s -@@ -10,3 +10,11 @@ xvfrecip.s $xr3, $xr16 - xvfrecip.d $xr17, $xr24 - # CHECK-INST: xvfrecip.d $xr17, $xr24 - # CHECK-ENCODING: encoding: [0x11,0xfb,0x9c,0x76] -+ -+xvfrecipe.s $xr3, $xr16 -+# CHECK-INST: xvfrecipe.s $xr3, $xr16 -+# CHECK-ENCODING: encoding: [0x03,0x16,0x9d,0x76] -+ -+xvfrecipe.d $xr17, $xr24 -+# CHECK-INST: xvfrecipe.d $xr17, $xr24 -+# CHECK-ENCODING: encoding: [0x11,0x1b,0x9d,0x76] -diff --git a/llvm/test/MC/LoongArch/lasx/frsqrt.s b/llvm/test/MC/LoongArch/lasx/frsqrt.s -index af96e10832df..d1048f9ff8f0 100644 ---- a/llvm/test/MC/LoongArch/lasx/frsqrt.s -+++ b/llvm/test/MC/LoongArch/lasx/frsqrt.s -@@ -10,3 +10,11 @@ xvfrsqrt.s $xr31, $xr25 - xvfrsqrt.d $xr14, $xr22 - # CHECK-INST: xvfrsqrt.d $xr14, $xr22 - # CHECK-ENCODING: encoding: [0xce,0x0a,0x9d,0x76] -+ -+xvfrsqrte.s $xr31, $xr25 -+# CHECK-INST: xvfrsqrte.s $xr31, $xr25 -+# CHECK-ENCODING: encoding: [0x3f,0x27,0x9d,0x76] -+ -+xvfrsqrte.d $xr14, $xr22 -+# CHECK-INST: xvfrsqrte.d $xr14, $xr22 -+# CHECK-ENCODING: encoding: [0xce,0x2a,0x9d,0x76] -diff --git a/llvm/test/MC/LoongArch/lsx/frecip.s b/llvm/test/MC/LoongArch/lsx/frecip.s -index d8c8278d1667..cd6d925e1470 100644 ---- a/llvm/test/MC/LoongArch/lsx/frecip.s -+++ b/llvm/test/MC/LoongArch/lsx/frecip.s -@@ -10,3 +10,11 @@ vfrecip.s $vr29, $vr14 - vfrecip.d $vr24, $vr9 - # CHECK-INST: vfrecip.d $vr24, $vr9 - # CHECK-ENCODING: encoding: [0x38,0xf9,0x9c,0x72] -+ -+vfrecipe.s $vr29, $vr14 -+# CHECK-INST: vfrecipe.s $vr29, $vr14 -+# CHECK-ENCODING: encoding: [0xdd,0x15,0x9d,0x72] -+ -+vfrecipe.d $vr24, $vr9 -+# CHECK-INST: vfrecipe.d $vr24, $vr9 -+# CHECK-ENCODING: encoding: [0x38,0x19,0x9d,0x72] -diff --git a/llvm/test/MC/LoongArch/lsx/frsqrt.s b/llvm/test/MC/LoongArch/lsx/frsqrt.s -index 68b0cc091b8a..d8b9fc3d0684 100644 ---- a/llvm/test/MC/LoongArch/lsx/frsqrt.s -+++ b/llvm/test/MC/LoongArch/lsx/frsqrt.s -@@ -10,3 +10,11 @@ vfrsqrt.s $vr19, $vr30 - vfrsqrt.d $vr1, $vr0 - # CHECK-INST: vfrsqrt.d $vr1, $vr0 - # CHECK-ENCODING: encoding: [0x01,0x08,0x9d,0x72] -+ -+vfrsqrte.s $vr19, $vr30 -+# CHECK-INST: vfrsqrte.s $vr19, $vr30 -+# CHECK-ENCODING: encoding: [0xd3,0x27,0x9d,0x72] -+ -+vfrsqrte.d $vr1, $vr0 -+# CHECK-INST: vfrsqrte.d $vr1, $vr0 -+# CHECK-ENCODING: encoding: [0x01,0x28,0x9d,0x72] --- -2.20.1 - - -From 57eaecf7bdb7a7502580076b365b4f70dde1185d Mon Sep 17 00:00:00 2001 -From: Ami-zhang -Date: Tue, 23 Jan 2024 14:24:58 +0800 -Subject: [PATCH 09/12] [LoongArch] Add definitions and feature 'frecipe' for - FP approximation intrinsics/builtins (#78962) - -This PR adds definitions and 'frecipe' feature for FP approximation -intrinsics/builtins. In additions, this adds and complements relative -testcases. - -(cherry picked from commit fcb8342a219ada8ec641790a4c8a9f969d7d64ee) ---- - llvm/include/llvm/IR/IntrinsicsLoongArch.td | 13 ++++++++++ - llvm/lib/Target/LoongArch/LoongArch.td | 7 +++++ - .../LoongArch/LoongArchFloat32InstrInfo.td | 6 +++++ - .../LoongArch/LoongArchFloat64InstrInfo.td | 6 +++++ - .../LoongArch/LoongArchLASXInstrInfo.td | 10 +++++++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 10 +++++++ - .../lib/Target/LoongArch/LoongArchSubtarget.h | 2 ++ - .../LoongArch/intrinsic-frecipe-dbl.ll | 26 +++++++++++++++++++ - .../LoongArch/intrinsic-frecipe-flt.ll | 26 +++++++++++++++++++ - .../LoongArch/lasx/intrinsic-frecipe.ll | 26 +++++++++++++++++++ - .../LoongArch/lasx/intrinsic-frsqrte.ll | 26 +++++++++++++++++++ - .../LoongArch/lsx/intrinsic-frecipe.ll | 26 +++++++++++++++++++ - .../LoongArch/lsx/intrinsic-frsqrte.ll | 26 +++++++++++++++++++ - 13 files changed, 210 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll - -diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -index 685deaec7709..9002076e7aec 100644 ---- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td -+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -@@ -122,6 +122,15 @@ def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [ImmArg>]>; - def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], - [ImmArg>]>; -+ -+def int_loongarch_frecipe_s : BaseInt<[llvm_float_ty], [llvm_float_ty], -+ [IntrNoMem]>; -+def int_loongarch_frecipe_d : BaseInt<[llvm_double_ty], [llvm_double_ty], -+ [IntrNoMem]>; -+def int_loongarch_frsqrte_s : BaseInt<[llvm_float_ty], [llvm_float_ty], -+ [IntrNoMem]>; -+def int_loongarch_frsqrte_d : BaseInt<[llvm_double_ty], [llvm_double_ty], -+ [IntrNoMem]>; - } // TargetPrefix = "loongarch" - - /// Vector intrinsic -@@ -527,10 +536,12 @@ foreach inst = ["vfmadd_d", "vfmsub_d", "vfnmadd_d", "vfnmsub_d"] in - [IntrNoMem]>; - - foreach inst = ["vflogb_s", "vfsqrt_s", "vfrecip_s", "vfrsqrt_s", "vfrint_s", -+ "vfrecipe_s", "vfrsqrte_s", - "vfrintrne_s", "vfrintrz_s", "vfrintrp_s", "vfrintrm_s"] in - def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4f32_ty], - [IntrNoMem]>; - foreach inst = ["vflogb_d", "vfsqrt_d", "vfrecip_d", "vfrsqrt_d", "vfrint_d", -+ "vfrecipe_d", "vfrsqrte_d", - "vfrintrne_d", "vfrintrz_d", "vfrintrp_d", "vfrintrm_d"] in - def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2f64_ty], - [IntrNoMem]>; -@@ -1044,10 +1055,12 @@ foreach inst = ["xvfmadd_d", "xvfmsub_d", "xvfnmadd_d", "xvfnmsub_d"] in - [IntrNoMem]>; - - foreach inst = ["xvflogb_s", "xvfsqrt_s", "xvfrecip_s", "xvfrsqrt_s", "xvfrint_s", -+ "xvfrecipe_s", "xvfrsqrte_s", - "xvfrintrne_s", "xvfrintrz_s", "xvfrintrp_s", "xvfrintrm_s"] in - def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty], - [IntrNoMem]>; - foreach inst = ["xvflogb_d", "xvfsqrt_d", "xvfrecip_d", "xvfrsqrt_d", "xvfrint_d", -+ "xvfrecipe_d", "xvfrsqrte_d", - "xvfrintrne_d", "xvfrintrz_d", "xvfrintrp_d", "xvfrintrm_d"] in - def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty], - [IntrNoMem]>; -diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td -index 2a4c991a43b0..5573e5415d26 100644 ---- a/llvm/lib/Target/LoongArch/LoongArch.td -+++ b/llvm/lib/Target/LoongArch/LoongArch.td -@@ -110,6 +110,13 @@ def FeatureAutoVec - : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", - "Experimental auto vectorization">; - -+// Floating point approximation operation -+def FeatureFrecipe -+ : SubtargetFeature<"frecipe", "HasFrecipe", "true", -+ "Support frecipe.{s/d} and frsqrte.{s/d} instructions.">; -+def HasFrecipe : Predicate<"Subtarget->hasFrecipe()">; -+ -+ - //===----------------------------------------------------------------------===// - // Registers, instruction descriptions ... - //===----------------------------------------------------------------------===// -diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -index f30837912e75..e27896768818 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -@@ -281,6 +281,12 @@ def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>; - // FP reciprocal operation - def : Pat<(fdiv fpimm1, FPR32:$src), (FRECIP_S $src)>; - -+let Predicates = [HasFrecipe] in { -+// FP approximate reciprocal operation -+def : Pat<(int_loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>; -+def : Pat<(int_loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>; -+} -+ - // fmadd.s: fj * fk + fa - def : Pat<(fma FPR32:$fj, FPR32:$fk, FPR32:$fa), (FMADD_S $fj, $fk, $fa)>; - -diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -index 0ea4c564b045..26bed67ac222 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -@@ -242,6 +242,12 @@ def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>; - // FP reciprocal operation - def : Pat<(fdiv fpimm1, FPR64:$src), (FRECIP_D $src)>; - -+let Predicates = [HasFrecipe] in { -+// FP approximate reciprocal operation -+def : Pat<(int_loongarch_frecipe_d FPR64:$src), (FRECIPE_D FPR64:$src)>; -+def : Pat<(int_loongarch_frsqrte_d FPR64:$src), (FRSQRTE_D FPR64:$src)>; -+} -+ - // fmadd.d: fj * fk + fa - def : Pat<(fma FPR64:$fj, FPR64:$fk, FPR64:$fa), (FMADD_D $fj, $fk, $fa)>; - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 454915ac8c0a..6f1969bf8cae 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -2080,6 +2080,16 @@ foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_ - def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), - (!cast(Inst) LASX256:$xj)>; - -+// 256-Bit vector FP approximate reciprocal operation -+let Predicates = [HasFrecipe] in { -+foreach Inst = ["XVFRECIPE_S", "XVFRSQRTE_S"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v8f32 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+foreach Inst = ["XVFRECIPE_D", "XVFRSQRTE_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+} -+ - def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), - (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 6d60d7074ec3..0580683c3ce3 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -2195,6 +2195,16 @@ foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", - def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), - (!cast(Inst) LSX128:$vj)>; - -+// 128-Bit vector FP approximate reciprocal operation -+let Predicates = [HasFrecipe] in { -+foreach Inst = ["VFRECIPE_S", "VFRSQRTE_S"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v4f32 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+foreach Inst = ["VFRECIPE_D", "VFRSQRTE_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+} -+ - // load - def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), - (VLD GPR:$rj, (to_valid_timm timm:$imm))>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -index 174e4cba8326..11c0b39e176e 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -@@ -45,6 +45,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { - bool HasUAL = false; - bool HasLinkerRelax = false; - bool HasExpAutoVec = false; -+ bool HasFrecipe = false; - unsigned GRLen = 32; - MVT GRLenVT = MVT::i32; - LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; -@@ -104,6 +105,7 @@ public: - bool hasUAL() const { return HasUAL; } - bool hasLinkerRelax() const { return HasLinkerRelax; } - bool hasExpAutoVec() const { return HasExpAutoVec; } -+ bool hasFrecipe() const { return HasFrecipe; } - MVT getGRLenVT() const { return GRLenVT; } - unsigned getGRLen() const { return GRLen; } - LoongArchABI::ABI getTargetABI() const { return TargetABI; } -diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll -new file mode 100644 -index 000000000000..9f572500caa0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll -@@ -0,0 +1,26 @@ -+; RUN: llc --mtriple=loongarch32 --mattr=+d,+frecipe < %s | FileCheck %s -+; RUN: llc --mtriple=loongarch64 --mattr=+d,+frecipe < %s | FileCheck %s -+ -+declare double @llvm.loongarch.frecipe.d(double) -+ -+define double @frecipe_d(double %a) { -+; CHECK-LABEL: frecipe_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: frecipe.d $fa0, $fa0 -+; CHECK-NEXT: ret -+entry: -+ %res = call double @llvm.loongarch.frecipe.d(double %a) -+ ret double %res -+} -+ -+declare double @llvm.loongarch.frsqrte.d(double) -+ -+define double @frsqrte_d(double %a) { -+; CHECK-LABEL: frsqrte_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: frsqrte.d $fa0, $fa0 -+; CHECK-NEXT: ret -+entry: -+ %res = call double @llvm.loongarch.frsqrte.d(double %a) -+ ret double %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll -new file mode 100644 -index 000000000000..0b2029f2e44a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll -@@ -0,0 +1,26 @@ -+; RUN: llc --mtriple=loongarch32 --mattr=+f,+frecipe < %s | FileCheck %s -+; RUN: llc --mtriple=loongarch64 --mattr=+f,+frecipe < %s | FileCheck %s -+ -+declare float @llvm.loongarch.frecipe.s(float) -+ -+define float @frecipe_s(float %a) { -+; CHECK-LABEL: frecipe_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: frecipe.s $fa0, $fa0 -+; CHECK-NEXT: ret -+entry: -+ %res = call float @llvm.loongarch.frecipe.s(float %a) -+ ret float %res -+} -+ -+declare float @llvm.loongarch.frsqrte.s(float) -+ -+define float @frsqrte_s(float %a) { -+; CHECK-LABEL: frsqrte_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: frsqrte.s $fa0, $fa0 -+; CHECK-NEXT: ret -+entry: -+ %res = call float @llvm.loongarch.frsqrte.s(float %a) -+ ret float %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll -new file mode 100644 -index 000000000000..215436823af8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrecipe_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrecipe_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrecipe.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrecipe_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrecipe_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrecipe.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll -new file mode 100644 -index 000000000000..ad36c3aa5c29 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrsqrte_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrsqrte_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrsqrte.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrsqrte_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrsqrte_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrsqrte.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll -new file mode 100644 -index 000000000000..1b7a97d9f972 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrecipe_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrecipe_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrecipe.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrecipe_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrecipe_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrecipe.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll -new file mode 100644 -index 000000000000..3cd6c78e87d7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrsqrte_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrsqrte_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrsqrte.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrsqrte_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrsqrte_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrsqrte.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> %va) -+ ret <2 x double> %res -+} --- -2.20.1 - - -From 6f9531b069971dc0f5c6b28bd6a6754c1b5fde72 Mon Sep 17 00:00:00 2001 -From: Ami-zhang -Date: Tue, 23 Jul 2024 14:03:28 +0800 -Subject: [PATCH 10/12] [LoongArch] Support -march=la64v1.0 and -march=la64v1.1 - (#100057) - -The newly added strings `la64v1.0` and `la64v1.1` in `-march` are as -described in LoongArch toolchains conventions (see [1]). - -The target-cpu/feature attributes are forwarded to compiler when -specifying particular `-march` parameter. The default cpu `loongarch64` -is returned when archname is `la64v1.0` or `la64v1.1`. - -In addition, this commit adds `la64v1.0`/`la64v1.1` to -"__loongarch_arch" and adds definition for macro "__loongarch_frecipe". - -[1]: https://github.com/loongson/la-toolchain-conventions - -(cherry picked from commit 5a1b9896ad5a7dcd25a1cc7a4d3fd44155e4b22d) ---- - llvm/lib/TargetParser/LoongArchTargetParser.cpp | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp -index 772d24c5ce3d..8e86d18de2ad 100644 ---- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp -+++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp -@@ -44,6 +44,17 @@ bool LoongArch::getArchFeatures(StringRef Arch, - return true; - } - } -+ -+ if (Arch == "la64v1.0" || Arch == "la64v1.1") { -+ Features.push_back("+64bit"); -+ Features.push_back("+d"); -+ Features.push_back("+lsx"); -+ Features.push_back("+ual"); -+ if (Arch == "la64v1.1") -+ Features.push_back("+frecipe"); -+ return true; -+ } -+ - return false; - } - --- -2.20.1 - - -From 6094875aa6aab1e28a096294783cada0243e95d5 Mon Sep 17 00:00:00 2001 -From: Ami-zhang -Date: Tue, 23 Jul 2024 15:14:20 +0800 -Subject: [PATCH 11/12] [LoongArch] Support la664 (#100068) - -A new ProcessorModel called `la664` is defined in LoongArch.td to -support `-march/-mtune=la664`. - -(cherry picked from commit fcec298087dba0c83f6d0bbafd6cd934c42cbf82) ---- - llvm/include/llvm/TargetParser/LoongArchTargetParser.def | 2 ++ - llvm/include/llvm/TargetParser/LoongArchTargetParser.h | 3 +++ - llvm/lib/Target/LoongArch/LoongArch.td | 7 +++++++ - llvm/lib/TargetParser/Host.cpp | 2 ++ - llvm/test/CodeGen/LoongArch/cpus.ll | 5 +++++ - 5 files changed, 19 insertions(+) - -diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def -index b20d124953f8..101a48cbd539 100644 ---- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def -+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def -@@ -10,6 +10,7 @@ LOONGARCH_FEATURE("+lasx", FK_LASX) - LOONGARCH_FEATURE("+lbt", FK_LBT) - LOONGARCH_FEATURE("+lvz", FK_LVZ) - LOONGARCH_FEATURE("+ual", FK_UAL) -+LOONGARCH_FEATURE("+frecipe", FK_FRECIPE) - - #undef LOONGARCH_FEATURE - -@@ -19,5 +20,6 @@ LOONGARCH_FEATURE("+ual", FK_UAL) - - LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) - LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) -+LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE) - - #undef LOONGARCH_ARCH -diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h -index 028844187584..c0bb15a5163b 100644 ---- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h -+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h -@@ -46,6 +46,9 @@ enum FeatureKind : uint32_t { - - // Allow memory accesses to be unaligned. - FK_UAL = 1 << 8, -+ -+ // Floating-point approximate reciprocal instructions are available. -+ FK_FRECIPE = 1 << 9, - }; - - struct FeatureInfo { -diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td -index 5573e5415d26..b5cd5bb0f8a4 100644 ---- a/llvm/lib/Target/LoongArch/LoongArch.td -+++ b/llvm/lib/Target/LoongArch/LoongArch.td -@@ -147,6 +147,13 @@ def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, - FeatureExtLVZ, - FeatureExtLBT]>; - -+def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit, -+ FeatureUAL, -+ FeatureExtLASX, -+ FeatureExtLVZ, -+ FeatureExtLBT, -+ FeatureFrecipe]>; -+ - //===----------------------------------------------------------------------===// - // Define the LoongArch target. - //===----------------------------------------------------------------------===// -diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp -index 8b23be02edc0..87e3e0b434d5 100644 ---- a/llvm/lib/TargetParser/Host.cpp -+++ b/llvm/lib/TargetParser/Host.cpp -@@ -1469,6 +1469,8 @@ StringRef sys::getHostCPUName() { - switch (processor_id & 0xf000) { - case 0xc000: // Loongson 64bit, 4-issue - return "la464"; -+ case 0xd000: // Loongson 64bit, 6-issue -+ return "la664"; - // TODO: Others. - default: - break; -diff --git a/llvm/test/CodeGen/LoongArch/cpus.ll b/llvm/test/CodeGen/LoongArch/cpus.ll -index 35945ae4de71..087cf887b813 100644 ---- a/llvm/test/CodeGen/LoongArch/cpus.ll -+++ b/llvm/test/CodeGen/LoongArch/cpus.ll -@@ -3,6 +3,7 @@ - - ; RUN: llc < %s --mtriple=loongarch64 --mcpu=loongarch64 2>&1 | FileCheck %s - ; RUN: llc < %s --mtriple=loongarch64 --mcpu=la464 2>&1 | FileCheck %s -+; RUN: llc < %s --mtriple=loongarch64 --mcpu=la664 2>&1 | FileCheck %s - ; RUN: llc < %s --mtriple=loongarch64 2>&1 | FileCheck %s - - ; CHECK-NOT: {{.*}} is not a recognized processor for this target -@@ -18,3 +19,7 @@ define void @tune_cpu_loongarch64() "tune-cpu"="loongarch64" { - define void @tune_cpu_la464() "tune-cpu"="la464" { - ret void - } -+ -+define void @tune_cpu_la664() "tune-cpu"="la664" { -+ ret void -+} --- -2.20.1 - - -From f06fec7597485a8d90aa81e3c65abea1bdeeb90b Mon Sep 17 00:00:00 2001 -From: Zhaoxin Yang -Date: Tue, 23 Jul 2024 15:19:00 +0800 -Subject: [PATCH 12/12] [LoongArch] Remove experimental `auto-vec` feature. - (#100070) - -Currently, automatic vectorization will be enabled with `-mlsx/-mlasx` -enabled. - -(cherry picked from commit 89d1eb67342d75d1de8d210157fdeaeb6a4724b6) ---- - llvm/lib/Target/LoongArch/LoongArch.td | 4 ---- - llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp | 2 -- - llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll | 2 +- - 3 files changed, 1 insertion(+), 7 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td -index b5cd5bb0f8a4..5f85cace71af 100644 ---- a/llvm/lib/Target/LoongArch/LoongArch.td -+++ b/llvm/lib/Target/LoongArch/LoongArch.td -@@ -105,10 +105,6 @@ def FeatureUAL - def FeatureRelax - : SubtargetFeature<"relax", "HasLinkerRelax", "true", - "Enable Linker relaxation">; --// Experimental auto vectorization --def FeatureAutoVec -- : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", -- "Experimental auto vectorization">; - - // Floating point approximation operation - def FeatureFrecipe -diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -index d47dded9ea6e..7961bb141e64 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -@@ -26,8 +26,6 @@ TypeSize LoongArchTTIImpl::getRegisterBitWidth( - case TargetTransformInfo::RGK_Scalar: - return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); - case TargetTransformInfo::RGK_FixedWidthVector: -- if (!ST->hasExpAutoVec()) -- return DefSize; - if (ST->hasExtLASX()) - return TypeSize::getFixed(256); - if (ST->hasExtLSX()) -diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll -index a8ac2411dd82..6ab300859f9d 100644 ---- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll -+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll -@@ -1,5 +1,5 @@ - ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 --; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s -+; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx -S | FileCheck %s - - ;; This is a collection of tests whose only purpose is to show changes in the - ;; default configuration. Please keep these tests minimal - if you're testing --- -2.20.1 - diff --git a/0016-LoongArch-NFC-Pre-commit-MCInstrAnalysis-tests-for-i.patch b/0016-LoongArch-NFC-Pre-commit-MCInstrAnalysis-tests-for-i.patch new file mode 100644 index 0000000..974f829 --- /dev/null +++ b/0016-LoongArch-NFC-Pre-commit-MCInstrAnalysis-tests-for-i.patch @@ -0,0 +1,89 @@ +From 5ac9529fad623693c70a0e47cd52e1b48243fc7d Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Wed, 15 Nov 2023 11:12:30 +0800 +Subject: [PATCH 16/27] [LoongArch][NFC] Pre-commit MCInstrAnalysis tests for + instruction 'b' (#71903) + +The tests for 'b' which commented with FIXME are incorrect, the +following patch will fix it. + +(cherry picked from commit f6c4bb07eaa94bcd5d02ba7a46850225b6ed50d4) +--- + .../Target/LoongArch/MCInstrAnalysisTest.cpp | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +index 6a208d274a0d..6e1919fc2261 100644 +--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp ++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +@@ -50,6 +50,8 @@ static MCInst beq() { + .addImm(32); + } + ++static MCInst b() { return MCInstBuilder(LoongArch::B).addImm(32); } ++ + static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } + + static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { +@@ -58,6 +60,7 @@ static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { + + TEST_P(InstrAnalysisTest, IsTerminator) { + EXPECT_TRUE(Analysis->isTerminator(beq())); ++ EXPECT_TRUE(Analysis->isTerminator(b())); + EXPECT_FALSE(Analysis->isTerminator(bl())); + EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); +@@ -65,6 +68,7 @@ TEST_P(InstrAnalysisTest, IsTerminator) { + + TEST_P(InstrAnalysisTest, IsCall) { + EXPECT_FALSE(Analysis->isCall(beq())); ++ EXPECT_FALSE(Analysis->isCall(b())); + EXPECT_TRUE(Analysis->isCall(bl())); + EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); +@@ -72,6 +76,7 @@ TEST_P(InstrAnalysisTest, IsCall) { + + TEST_P(InstrAnalysisTest, IsReturn) { + EXPECT_FALSE(Analysis->isReturn(beq())); ++ EXPECT_FALSE(Analysis->isReturn(b())); + EXPECT_FALSE(Analysis->isReturn(bl())); + EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); + EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); +@@ -80,14 +85,26 @@ TEST_P(InstrAnalysisTest, IsReturn) { + + TEST_P(InstrAnalysisTest, IsBranch) { + EXPECT_TRUE(Analysis->isBranch(beq())); ++ EXPECT_TRUE(Analysis->isBranch(b())); + EXPECT_FALSE(Analysis->isBranch(bl())); + EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); + } + ++TEST_P(InstrAnalysisTest, IsConditionalBranch) { ++ EXPECT_TRUE(Analysis->isConditionalBranch(beq())); ++ // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is ++ // wrong. The following patch will fix it. ++ EXPECT_TRUE(Analysis->isConditionalBranch(b())); ++ EXPECT_FALSE(Analysis->isConditionalBranch(bl())); ++} ++ + TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); ++ // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is ++ // wrong. The following patch will fix it. ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); + EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); +@@ -97,6 +114,7 @@ TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + + TEST_P(InstrAnalysisTest, IsIndirectBranch) { + EXPECT_FALSE(Analysis->isIndirectBranch(beq())); ++ EXPECT_FALSE(Analysis->isIndirectBranch(b())); + EXPECT_FALSE(Analysis->isIndirectBranch(bl())); + EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); +-- +2.20.1 + diff --git a/0016-LoongArch-Pass-OptLevel-to-LoongArchDAGToDAGISel-cor.patch b/0016-LoongArch-Pass-OptLevel-to-LoongArchDAGToDAGISel-cor.patch deleted file mode 100644 index 62ee231..0000000 --- a/0016-LoongArch-Pass-OptLevel-to-LoongArchDAGToDAGISel-cor.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 2dd8460d8a363039e21c48ff65031c3fa1a92c0e Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Sat, 7 Jun 2025 11:45:39 +0800 -Subject: [PATCH 2/3] [LoongArch] Pass OptLevel to LoongArchDAGToDAGISel - correctly - -Like many other targets did. And see RISCV for similar fix. - -Fix https://github.com/llvm/llvm-project/issues/143239 - -(cherry picked from commit 90a52f4942961a5c32afc69d69470c6b7e5bcb8a) ---- - llvm/lib/Target/LoongArch/LoongArch.h | 3 ++- - llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp | 5 +++-- - llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h | 5 +++-- - llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp | 2 +- - llvm/test/CodeGen/LoongArch/O0-pipeline.ll | 8 -------- - llvm/test/CodeGen/LoongArch/isel-optnone.ll | 7 ++----- - llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll | 1 + - 7 files changed, 12 insertions(+), 19 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h -index 09ca089c9115..a2267024a976 100644 ---- a/llvm/lib/Target/LoongArch/LoongArch.h -+++ b/llvm/lib/Target/LoongArch/LoongArch.h -@@ -34,7 +34,8 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, - const AsmPrinter &AP); - - FunctionPass *createLoongArchExpandAtomicPseudoPass(); --FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM); -+FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM, -+ CodeGenOpt::Level OptLevel); - FunctionPass *createLoongArchPreRAExpandPseudoPass(); - FunctionPass *createLoongArchExpandPseudoPass(); - void initializeLoongArchDAGToDAGISelPass(PassRegistry &); -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -index 01b2f720f902..535405022eda 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -@@ -412,6 +412,7 @@ bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, - - // This pass converts a legalized DAG into a LoongArch-specific DAG, ready - // for instruction scheduling. --FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { -- return new LoongArchDAGToDAGISel(TM); -+FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM, -+ CodeGenOpt::Level OptLevel) { -+ return new LoongArchDAGToDAGISel(TM, OptLevel); - } -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -index 5e3d6ccc3755..8149e0432126 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -@@ -28,8 +28,9 @@ public: - - LoongArchDAGToDAGISel() = delete; - -- explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM) -- : SelectionDAGISel(ID, TM) {} -+ explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM, -+ CodeGenOpt::Level OptLevel) -+ : SelectionDAGISel(ID, TM, OptLevel) {} - - bool runOnMachineFunction(MachineFunction &MF) override { - Subtarget = &MF.getSubtarget(); -diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp -index 0efc5e6ebb99..c683902331d9 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp -@@ -167,7 +167,7 @@ void LoongArchPassConfig::addIRPasses() { - } - - bool LoongArchPassConfig::addInstSelector() { -- addPass(createLoongArchISelDag(getLoongArchTargetMachine())); -+ addPass(createLoongArchISelDag(getLoongArchTargetMachine(), getOptLevel())); - - return false; - } -diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll -index 84d235d78eb9..9786390687d6 100644 ---- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll -+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll -@@ -35,15 +35,7 @@ - ; CHECK-NEXT: Safe Stack instrumentation pass - ; CHECK-NEXT: Insert stack protectors - ; CHECK-NEXT: Module Verifier --; CHECK-NEXT: Dominator Tree Construction --; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) --; CHECK-NEXT: Function Alias Analysis Results --; CHECK-NEXT: Natural Loop Information --; CHECK-NEXT: Post-Dominator Tree Construction --; CHECK-NEXT: Branch Probability Analysis - ; CHECK-NEXT: Assignment Tracking Analysis --; CHECK-NEXT: Lazy Branch Probability Analysis --; CHECK-NEXT: Lazy Block Frequency Analysis - ; CHECK-NEXT: LoongArch DAG->DAG Pattern Instruction Selection - ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions - ; CHECK-NEXT: Local Stack Slot Allocation -diff --git a/llvm/test/CodeGen/LoongArch/isel-optnone.ll b/llvm/test/CodeGen/LoongArch/isel-optnone.ll -index d44f1405d0c1..4d2528a3148a 100644 ---- a/llvm/test/CodeGen/LoongArch/isel-optnone.ll -+++ b/llvm/test/CodeGen/LoongArch/isel-optnone.ll -@@ -2,11 +2,8 @@ - ; RUN: llc %s -O0 -mtriple=loongarch64 -o /dev/null -debug-only=isel 2>&1 | FileCheck %s - - define void @fooOptnone() #0 { --; CHECK: Changing optimization level for Function fooOptnone --; CHECK: Before: -O2 ; After: -O0 -- --; CHECK: Restoring optimization level for Function fooOptnone --; CHECK: Before: -O0 ; After: -O2 -+; CHECK-NOT: Changing optimization level for Function fooOptnone -+; CHECK-NOT: Restoring optimization level for Function fooOptnone - ret void - } - -diff --git a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll -index 092da5aba2d9..ff242c2a0f98 100644 ---- a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll -+++ b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll -@@ -39,6 +39,7 @@ define dso_local ptr @f(i32 noundef signext %i) "frame-pointer"="all" { - ; CHECK-NEXT: b .LBB0_3 - ; CHECK-NEXT: .LBB0_3: # %if.end - ; CHECK-NEXT: ld.d $a0, $fp, -48 # 8-byte Folded Reload -+; CHECK-NEXT: addi.w $a0, $a0, 0 - ; CHECK-NEXT: ori $a1, $zero, 1 - ; CHECK-NEXT: bne $a0, $a1, .LBB0_6 - ; CHECK-NEXT: b .LBB0_4 --- -2.20.1 - diff --git a/0017-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch b/0017-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch new file mode 100644 index 0000000..3faf10b --- /dev/null +++ b/0017-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch @@ -0,0 +1,12430 @@ +From 26cf77602203853f56675ae6831a1d2837d03ba1 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Fri, 27 Oct 2023 15:57:30 +0800 +Subject: [PATCH 17/42] [LoongArch][CodeGen] Add LSX builtin testcases + +(cherry picked from commit 673c530837faa5ddb45769ddee01d09e1f73d406) + +--- + .../LoongArch/lsx/builtin-alias-error.c | 1359 +++++ + .../CodeGen/LoongArch/lsx/builtin-alias.c | 4451 ++++++++++++++ + .../CodeGen/LoongArch/lsx/builtin-error.c | 1382 +++++ + clang/test/CodeGen/LoongArch/lsx/builtin.c | 5193 +++++++++++++++++ + 4 files changed, 12385 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin.c + +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c +new file mode 100644 +index 000000000000..69cf2254fdd7 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c +@@ -0,0 +1,1359 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++#include ++ ++v16i8 vslli_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslli_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslli_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslli_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrai_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrai_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrai_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrai_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrari_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrari_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrari_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrari_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrli_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrli_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrli_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrli_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlri_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlri_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlri_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlri_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitclri_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitclri_h(v8u16 _1, int var) { ++ v8u16 res = __lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitclri_w(v4u32 _1, int var) { ++ v4u32 res = __lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitclri_d(v2u64 _1, int var) { ++ v2u64 res = __lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseti_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitseti_h(v8u16 _1, int var) { ++ v8u16 res = __lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitseti_w(v4u32 _1, int var) { ++ v4u32 res = __lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitseti_d(v2u64 _1, int var) { ++ v2u64 res = __lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitrevi_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitrevi_h(v8u16 _1, int var) { ++ v8u16 res = __lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitrevi_w(v4u32 _1, int var) { ++ v4u32 res = __lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitrevi_d(v2u64 _1, int var) { ++ v2u64 res = __lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vaddi_bu(v16i8 _1, int var) { ++ v16i8 res = __lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vaddi_hu(v8i16 _1, int var) { ++ v8i16 res = __lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vaddi_wu(v4i32 _1, int var) { ++ v4i32 res = __lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vaddi_du(v2i64 _1, int var) { ++ v2i64 res = __lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsubi_bu(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsubi_hu(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsubi_wu(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsubi_du(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmaxi_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmaxi_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vmaxi_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmaxi_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmaxi_bu(v16u8 _1, int var) { ++ v16u8 res = __lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmaxi_hu(v8u16 _1, int var) { ++ v8u16 res = __lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmaxi_wu(v4u32 _1, int var) { ++ v4u32 res = __lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmaxi_du(v2u64 _1, int var) { ++ v2u64 res = __lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmini_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmini_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v4i32 vmini_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmini_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmini_bu(v16u8 _1, int var) { ++ v16u8 res = __lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmini_hu(v8u16 _1, int var) { ++ v8u16 res = __lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmini_wu(v4u32 _1, int var) { ++ v4u32 res = __lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmini_du(v2u64 _1, int var) { ++ v2u64 res = __lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vseqi_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vseqi_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vseqi_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vseqi_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_bu(v16u8 _1, int var) { ++ v16i8 res = __lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_hu(v8u16 _1, int var) { ++ v8i16 res = __lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_wu(v4u32 _1, int var) { ++ v4i32 res = __lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_du(v2u64 _1, int var) { ++ v2i64 res = __lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_bu(v16u8 _1, int var) { ++ v16i8 res = __lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_hu(v8u16 _1, int var) { ++ v8i16 res = __lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_wu(v4u32 _1, int var) { ++ v4i32 res = __lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_du(v2u64 _1, int var) { ++ v2i64 res = __lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsat_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsat_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsat_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsat_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vsat_bu(v16u8 _1, int var) { ++ v16u8 res = __lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsat_hu(v8u16 _1, int var) { ++ v8u16 res = __lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsat_wu(v4u32 _1, int var) { ++ v4u32 res = __lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsat_du(v2u64 _1, int var) { ++ v2u64 res = __lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vreplvei_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vreplvei_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vreplvei_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vreplvei_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vandi_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vori_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vnori_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vxori_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { ++ v16u8 res = __lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vshuf4i_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vshuf4i_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vshuf4i_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_b(v16i8 _1, int var) { ++ int res = __lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_h(v8i16 _1, int var) { ++ int res = __lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_w(v4i32 _1, int var) { ++ int res = __lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++long vpickve2gr_d(v2i64 _1, int var) { ++ long res = __lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_bu(v16i8 _1, int var) { ++ unsigned int res = __lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_hu(v8i16 _1, int var) { ++ unsigned int res = __lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_wu(v4i32 _1, int var) { ++ unsigned int res = __lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int vpickve2gr_du(v2i64 _1, int var) { ++ unsigned long int res = __lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vinsgr2vr_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vinsgr2vr_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vinsgr2vr_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vinsgr2vr_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsllwil_h_b(v16i8 _1, int var) { ++ v8i16 res = __lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsllwil_w_h(v8i16 _1, int var) { ++ v4i32 res = __lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsllwil_d_w(v4i32 _1, int var) { ++ v2i64 res = __lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsllwil_hu_bu(v16u8 _1, int var) { ++ v8u16 res = __lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsllwil_wu_hu(v8u16 _1, int var) { ++ v4u32 res = __lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsllwil_du_wu(v4u32 _1, int var) { ++ v2u64 res = __lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsrl_v(v16i8 _1, int var) { ++ v16i8 res = __lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsll_v(v16i8 _1, int var) { ++ v16i8 res = __lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} ++ return res; ++} ++ ++void vstelm_b_idx(v16i8 _1, void *_2, int var) { ++ __lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ __lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h_idx(v8i16 _1, void *_2, int var) { ++ __lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ __lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w_idx(v4i32 _1, void *_2, int var) { ++ __lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ __lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d_idx(v2i64 _1, void *_2, int var) { ++ __lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ __lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ __lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++void vstelm_b(v16i8 _1, void *_2, int var) { ++ __lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} ++ __lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} ++ __lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h(v8i16 _1, void *_2, int var) { ++ __lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} ++ __lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} ++ __lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w(v4i32 _1, void *_2, int var) { ++ __lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} ++ __lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} ++ __lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d(v2i64 _1, void *_2, int var) { ++ __lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} ++ __lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} ++ __lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++v16i8 vldrepl_b(void *_1, int var) { ++ v16i8 res = __lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vldrepl_h(void *_1, int var) { ++ v8i16 res = __lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} ++ res |= __lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} ++ res |= __lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vldrepl_w(void *_1, int var) { ++ v4i32 res = __lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} ++ res |= __lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} ++ res |= __lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vldrepl_d(void *_1, int var) { ++ v2i64 res = __lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} ++ res |= __lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} ++ res |= __lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrotri_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrotri_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrotri_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrotri_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vld(void *_1, int var) { ++ v16i8 res = __lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} ++ return res; ++} ++ ++void vst(v16i8 _1, void *_2, int var) { ++ __lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ __lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} ++} ++ ++v2i64 vldi(int var) { ++ v2i64 res = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ res |= __lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} ++ res |= __lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrepli_b(int var) { ++ v16i8 res = __lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrepli_d(int var) { ++ v2i64 res = __lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrepli_h(int var) { ++ v8i16 res = __lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrepli_w(int var) { ++ v4i32 res = __lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +new file mode 100644 +index 000000000000..331e29fb7d17 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +@@ -0,0 +1,4451 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++// CHECK-LABEL: @vsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } ++// CHECK-LABEL: @vsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } ++// CHECK-LABEL: @vsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } ++// CHECK-LABEL: @vsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } ++// CHECK-LABEL: @vslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } ++// CHECK-LABEL: @vslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } ++// CHECK-LABEL: @vslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } ++// CHECK-LABEL: @vslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } ++// CHECK-LABEL: @vsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } ++// CHECK-LABEL: @vsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } ++// CHECK-LABEL: @vsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } ++// CHECK-LABEL: @vsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } ++// CHECK-LABEL: @vsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } ++// CHECK-LABEL: @vsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } ++// CHECK-LABEL: @vsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } ++// CHECK-LABEL: @vsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } ++// CHECK-LABEL: @vsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } ++// CHECK-LABEL: @vsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } ++// CHECK-LABEL: @vsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } ++// CHECK-LABEL: @vsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } ++// CHECK-LABEL: @vsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } ++// CHECK-LABEL: @vsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } ++// CHECK-LABEL: @vsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } ++// CHECK-LABEL: @vsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } ++// CHECK-LABEL: @vsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } ++// CHECK-LABEL: @vsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } ++// CHECK-LABEL: @vsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } ++// CHECK-LABEL: @vsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } ++// CHECK-LABEL: @vsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } ++// CHECK-LABEL: @vsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } ++// CHECK-LABEL: @vsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } ++// CHECK-LABEL: @vsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } ++// CHECK-LABEL: @vsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } ++// CHECK-LABEL: @vsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } ++// CHECK-LABEL: @vsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } ++// CHECK-LABEL: @vsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } ++// CHECK-LABEL: @vsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } ++// CHECK-LABEL: @vsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } ++// CHECK-LABEL: @vsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } ++// CHECK-LABEL: @vsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } ++// CHECK-LABEL: @vbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } ++// CHECK-LABEL: @vbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } ++// CHECK-LABEL: @vbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } ++// CHECK-LABEL: @vbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } ++// CHECK-LABEL: @vbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } ++// CHECK-LABEL: @vbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } ++// CHECK-LABEL: @vbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } ++// CHECK-LABEL: @vbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } ++// CHECK-LABEL: @vbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } ++// CHECK-LABEL: @vbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } ++// CHECK-LABEL: @vbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } ++// CHECK-LABEL: @vbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } ++// CHECK-LABEL: @vbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } ++// CHECK-LABEL: @vbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } ++// CHECK-LABEL: @vbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } ++// CHECK-LABEL: @vbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } ++// CHECK-LABEL: @vbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } ++// CHECK-LABEL: @vbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } ++// CHECK-LABEL: @vbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } ++// CHECK-LABEL: @vbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } ++// CHECK-LABEL: @vbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } ++// CHECK-LABEL: @vbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } ++// CHECK-LABEL: @vbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } ++// CHECK-LABEL: @vbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } ++// CHECK-LABEL: @vadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } ++// CHECK-LABEL: @vadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } ++// CHECK-LABEL: @vadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } ++// CHECK-LABEL: @vadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } ++// CHECK-LABEL: @vaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } ++// CHECK-LABEL: @vaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } ++// CHECK-LABEL: @vaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } ++// CHECK-LABEL: @vaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } ++// CHECK-LABEL: @vsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } ++// CHECK-LABEL: @vsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } ++// CHECK-LABEL: @vsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } ++// CHECK-LABEL: @vsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } ++// CHECK-LABEL: @vsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } ++// CHECK-LABEL: @vsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } ++// CHECK-LABEL: @vsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } ++// CHECK-LABEL: @vsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } ++// CHECK-LABEL: @vmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } ++// CHECK-LABEL: @vmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } ++// CHECK-LABEL: @vmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } ++// CHECK-LABEL: @vmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } ++// CHECK-LABEL: @vmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } ++// CHECK-LABEL: @vmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } ++// CHECK-LABEL: @vmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } ++// CHECK-LABEL: @vmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } ++// CHECK-LABEL: @vmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } ++// CHECK-LABEL: @vmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } ++// CHECK-LABEL: @vmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } ++// CHECK-LABEL: @vmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } ++// CHECK-LABEL: @vmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } ++// CHECK-LABEL: @vmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } ++// CHECK-LABEL: @vmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } ++// CHECK-LABEL: @vmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } ++// CHECK-LABEL: @vmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } ++// CHECK-LABEL: @vmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } ++// CHECK-LABEL: @vmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } ++// CHECK-LABEL: @vmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } ++// CHECK-LABEL: @vmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } ++// CHECK-LABEL: @vmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } ++// CHECK-LABEL: @vmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } ++// CHECK-LABEL: @vmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } ++// CHECK-LABEL: @vmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } ++// CHECK-LABEL: @vmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } ++// CHECK-LABEL: @vmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } ++// CHECK-LABEL: @vmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } ++// CHECK-LABEL: @vmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } ++// CHECK-LABEL: @vmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } ++// CHECK-LABEL: @vmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } ++// CHECK-LABEL: @vmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } ++// CHECK-LABEL: @vseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } ++// CHECK-LABEL: @vseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } ++// CHECK-LABEL: @vseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } ++// CHECK-LABEL: @vseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } ++// CHECK-LABEL: @vseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } ++// CHECK-LABEL: @vseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } ++// CHECK-LABEL: @vseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } ++// CHECK-LABEL: @vseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } ++// CHECK-LABEL: @vslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } ++// CHECK-LABEL: @vslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } ++// CHECK-LABEL: @vslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } ++// CHECK-LABEL: @vslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } ++// CHECK-LABEL: @vslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } ++// CHECK-LABEL: @vslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } ++// CHECK-LABEL: @vslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } ++// CHECK-LABEL: @vslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } ++// CHECK-LABEL: @vslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } ++// CHECK-LABEL: @vslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } ++// CHECK-LABEL: @vslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } ++// CHECK-LABEL: @vslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } ++// CHECK-LABEL: @vslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } ++// CHECK-LABEL: @vslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } ++// CHECK-LABEL: @vslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } ++// CHECK-LABEL: @vslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } ++// CHECK-LABEL: @vsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } ++// CHECK-LABEL: @vsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } ++// CHECK-LABEL: @vsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } ++// CHECK-LABEL: @vsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } ++// CHECK-LABEL: @vslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } ++// CHECK-LABEL: @vslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } ++// CHECK-LABEL: @vslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } ++// CHECK-LABEL: @vslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } ++// CHECK-LABEL: @vsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } ++// CHECK-LABEL: @vsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } ++// CHECK-LABEL: @vsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } ++// CHECK-LABEL: @vsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } ++// CHECK-LABEL: @vslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } ++// CHECK-LABEL: @vslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } ++// CHECK-LABEL: @vslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } ++// CHECK-LABEL: @vslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } ++// CHECK-LABEL: @vsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } ++// CHECK-LABEL: @vsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } ++// CHECK-LABEL: @vsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } ++// CHECK-LABEL: @vsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } ++// CHECK-LABEL: @vsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } ++// CHECK-LABEL: @vsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } ++// CHECK-LABEL: @vsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } ++// CHECK-LABEL: @vsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } ++// CHECK-LABEL: @vadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } ++// CHECK-LABEL: @vadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } ++// CHECK-LABEL: @vadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } ++// CHECK-LABEL: @vadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } ++// CHECK-LABEL: @vsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } ++// CHECK-LABEL: @vsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } ++// CHECK-LABEL: @vsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } ++// CHECK-LABEL: @vsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } ++// CHECK-LABEL: @vsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } ++// CHECK-LABEL: @vsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } ++// CHECK-LABEL: @vsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } ++// CHECK-LABEL: @vsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } ++// CHECK-LABEL: @vavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } ++// CHECK-LABEL: @vavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } ++// CHECK-LABEL: @vavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } ++// CHECK-LABEL: @vavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } ++// CHECK-LABEL: @vavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } ++// CHECK-LABEL: @vavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } ++// CHECK-LABEL: @vavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } ++// CHECK-LABEL: @vavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } ++// CHECK-LABEL: @vavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } ++// CHECK-LABEL: @vavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } ++// CHECK-LABEL: @vavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } ++// CHECK-LABEL: @vavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } ++// CHECK-LABEL: @vavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } ++// CHECK-LABEL: @vavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } ++// CHECK-LABEL: @vavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } ++// CHECK-LABEL: @vavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } ++// CHECK-LABEL: @vssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } ++// CHECK-LABEL: @vssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } ++// CHECK-LABEL: @vssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } ++// CHECK-LABEL: @vssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } ++// CHECK-LABEL: @vssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } ++// CHECK-LABEL: @vssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } ++// CHECK-LABEL: @vssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } ++// CHECK-LABEL: @vssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } ++// CHECK-LABEL: @vabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } ++// CHECK-LABEL: @vabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } ++// CHECK-LABEL: @vabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } ++// CHECK-LABEL: @vabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } ++// CHECK-LABEL: @vabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } ++// CHECK-LABEL: @vabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } ++// CHECK-LABEL: @vabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } ++// CHECK-LABEL: @vabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } ++// CHECK-LABEL: @vmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } ++// CHECK-LABEL: @vmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } ++// CHECK-LABEL: @vmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } ++// CHECK-LABEL: @vmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } ++// CHECK-LABEL: @vmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmadd_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmadd_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmadd_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmsub_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmsub_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmsub_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } ++// CHECK-LABEL: @vdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } ++// CHECK-LABEL: @vdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } ++// CHECK-LABEL: @vdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } ++// CHECK-LABEL: @vdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } ++// CHECK-LABEL: @vdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } ++// CHECK-LABEL: @vdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } ++// CHECK-LABEL: @vdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } ++// CHECK-LABEL: @vhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } ++// CHECK-LABEL: @vhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } ++// CHECK-LABEL: @vhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } ++// CHECK-LABEL: @vhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } ++// CHECK-LABEL: @vhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } ++// CHECK-LABEL: @vhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } ++// CHECK-LABEL: @vhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } ++// CHECK-LABEL: @vhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } ++// CHECK-LABEL: @vhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } ++// CHECK-LABEL: @vhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } ++// CHECK-LABEL: @vhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } ++// CHECK-LABEL: @vhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } ++// CHECK-LABEL: @vmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } ++// CHECK-LABEL: @vmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } ++// CHECK-LABEL: @vmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } ++// CHECK-LABEL: @vmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } ++// CHECK-LABEL: @vmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } ++// CHECK-LABEL: @vmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } ++// CHECK-LABEL: @vmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } ++// CHECK-LABEL: @vmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } ++// CHECK-LABEL: @vreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } ++// CHECK-LABEL: @vreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } ++// CHECK-LABEL: @vreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } ++// CHECK-LABEL: @vreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } ++// CHECK-LABEL: @vreplvei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } ++// CHECK-LABEL: @vreplvei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } ++// CHECK-LABEL: @vreplvei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } ++// CHECK-LABEL: @vreplvei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } ++// CHECK-LABEL: @vpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } ++// CHECK-LABEL: @vpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } ++// CHECK-LABEL: @vpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } ++// CHECK-LABEL: @vpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } ++// CHECK-LABEL: @vpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } ++// CHECK-LABEL: @vpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } ++// CHECK-LABEL: @vpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } ++// CHECK-LABEL: @vpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } ++// CHECK-LABEL: @vilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } ++// CHECK-LABEL: @vilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } ++// CHECK-LABEL: @vilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } ++// CHECK-LABEL: @vilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } ++// CHECK-LABEL: @vilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } ++// CHECK-LABEL: @vilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } ++// CHECK-LABEL: @vilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } ++// CHECK-LABEL: @vilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } ++// CHECK-LABEL: @vpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } ++// CHECK-LABEL: @vpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } ++// CHECK-LABEL: @vpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } ++// CHECK-LABEL: @vpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } ++// CHECK-LABEL: @vpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } ++// CHECK-LABEL: @vpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } ++// CHECK-LABEL: @vpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } ++// CHECK-LABEL: @vpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } ++// CHECK-LABEL: @vshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vshuf_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vshuf_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vshuf_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } ++// CHECK-LABEL: @vandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } ++// CHECK-LABEL: @vor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } ++// CHECK-LABEL: @vori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } ++// CHECK-LABEL: @vnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } ++// CHECK-LABEL: @vnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } ++// CHECK-LABEL: @vxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } ++// CHECK-LABEL: @vxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } ++// CHECK-LABEL: @vbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { ++ return __lsx_vbitsel_v(_1, _2, _3); ++} ++// CHECK-LABEL: @vbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } ++// CHECK-LABEL: @vshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } ++// CHECK-LABEL: @vshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } ++// CHECK-LABEL: @vshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } ++// CHECK-LABEL: @vreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } ++// CHECK-LABEL: @vreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } ++// CHECK-LABEL: @vreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } ++// CHECK-LABEL: @vreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } ++// CHECK-LABEL: @vpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } ++// CHECK-LABEL: @vpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } ++// CHECK-LABEL: @vpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } ++// CHECK-LABEL: @vpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } ++// CHECK-LABEL: @vclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } ++// CHECK-LABEL: @vclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } ++// CHECK-LABEL: @vclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } ++// CHECK-LABEL: @vclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } ++// CHECK-LABEL: @vclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } ++// CHECK-LABEL: @vclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } ++// CHECK-LABEL: @vclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } ++// CHECK-LABEL: @vclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } ++// CHECK-LABEL: @vpickve2gr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } ++// CHECK-LABEL: @vinsgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } ++// CHECK-LABEL: @vinsgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } ++// CHECK-LABEL: @vinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } ++// CHECK-LABEL: @vinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } ++// CHECK-LABEL: @vfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } ++// CHECK-LABEL: @vfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } ++// CHECK-LABEL: @vfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } ++// CHECK-LABEL: @vfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } ++// CHECK-LABEL: @vfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } ++// CHECK-LABEL: @vfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } ++// CHECK-LABEL: @vfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } ++// CHECK-LABEL: @vfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } ++// CHECK-LABEL: @vfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } ++// CHECK-LABEL: @vfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } ++// CHECK-LABEL: @vfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } ++// CHECK-LABEL: @vfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } ++// CHECK-LABEL: @vfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } ++// CHECK-LABEL: @vfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } ++// CHECK-LABEL: @vfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } ++// CHECK-LABEL: @vfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } ++// CHECK-LABEL: @vfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } ++// CHECK-LABEL: @vfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } ++// CHECK-LABEL: @vfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } ++// CHECK-LABEL: @vfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } ++// CHECK-LABEL: @vfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } ++// CHECK-LABEL: @vfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } ++// CHECK-LABEL: @vfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } ++// CHECK-LABEL: @vfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } ++// CHECK-LABEL: @vfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } ++// CHECK-LABEL: @vfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } ++// CHECK-LABEL: @vfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } ++// CHECK-LABEL: @vfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } ++// CHECK-LABEL: @vflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } ++// CHECK-LABEL: @vflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } ++// CHECK-LABEL: @vfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } ++// CHECK-LABEL: @vfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } ++// CHECK-LABEL: @vfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } ++// CHECK-LABEL: @vfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } ++// CHECK-LABEL: @vftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } ++// CHECK-LABEL: @vftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } ++// CHECK-LABEL: @vftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } ++// CHECK-LABEL: @vftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } ++// CHECK-LABEL: @vftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } ++// CHECK-LABEL: @vftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } ++// CHECK-LABEL: @vftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } ++// CHECK-LABEL: @vftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } ++// CHECK-LABEL: @vffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } ++// CHECK-LABEL: @vffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } ++// CHECK-LABEL: @vffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } ++// CHECK-LABEL: @vffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } ++// CHECK-LABEL: @vandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } ++// CHECK-LABEL: @vneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } ++// CHECK-LABEL: @vneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } ++// CHECK-LABEL: @vneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } ++// CHECK-LABEL: @vneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } ++// CHECK-LABEL: @vmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } ++// CHECK-LABEL: @vmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } ++// CHECK-LABEL: @vmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } ++// CHECK-LABEL: @vmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } ++// CHECK-LABEL: @vmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } ++// CHECK-LABEL: @vmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } ++// CHECK-LABEL: @vmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } ++// CHECK-LABEL: @vmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } ++// CHECK-LABEL: @vsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @vsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @vsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @vsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } ++// CHECK-LABEL: @vsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } ++// CHECK-LABEL: @vsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } ++// CHECK-LABEL: @vsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } ++// CHECK-LABEL: @vsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } ++// CHECK-LABEL: @vsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } ++// CHECK-LABEL: @vssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } ++// CHECK-LABEL: @vssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } ++// CHECK-LABEL: @vssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } ++// CHECK-LABEL: @vssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } ++// CHECK-LABEL: @vssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } ++// CHECK-LABEL: @vssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } ++// CHECK-LABEL: @vsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } ++// CHECK-LABEL: @vsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } ++// CHECK-LABEL: @vsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } ++// CHECK-LABEL: @vssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } ++// CHECK-LABEL: @vssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } ++// CHECK-LABEL: @vssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } ++// CHECK-LABEL: @vssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } ++// CHECK-LABEL: @vsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } ++// CHECK-LABEL: @vsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } ++// CHECK-LABEL: @vsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } ++// CHECK-LABEL: @vssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } ++// CHECK-LABEL: @vssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } ++// CHECK-LABEL: @vssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } ++// CHECK-LABEL: @vsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @vsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @vsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } ++// CHECK-LABEL: @vssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } ++// CHECK-LABEL: @vssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } ++// CHECK-LABEL: @vfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } ++// CHECK-LABEL: @vfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } ++// CHECK-LABEL: @vfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vfrstp_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vfrstp_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } ++// CHECK-LABEL: @vbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } ++// CHECK-LABEL: @vbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } ++// CHECK-LABEL: @vextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } ++// CHECK-LABEL: @vextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } ++// CHECK-LABEL: @vextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } ++// CHECK-LABEL: @vextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } ++// CHECK-LABEL: @vmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } ++// CHECK-LABEL: @vmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } ++// CHECK-LABEL: @vmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } ++// CHECK-LABEL: @vmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } ++// CHECK-LABEL: @vsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } ++// CHECK-LABEL: @vsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } ++// CHECK-LABEL: @vsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } ++// CHECK-LABEL: @vsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } ++// CHECK-LABEL: @vfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfnmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfnmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfnmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfnmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } ++// CHECK-LABEL: @vftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } ++// CHECK-LABEL: @vftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } ++// CHECK-LABEL: @vftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } ++// CHECK-LABEL: @vftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } ++// CHECK-LABEL: @vftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } ++// CHECK-LABEL: @vftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } ++// CHECK-LABEL: @vffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } ++// CHECK-LABEL: @vftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } ++// CHECK-LABEL: @vftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } ++// CHECK-LABEL: @vftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } ++// CHECK-LABEL: @vftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } ++// CHECK-LABEL: @vftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } ++// CHECK-LABEL: @vftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } ++// CHECK-LABEL: @vffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } ++// CHECK-LABEL: @vffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } ++// CHECK-LABEL: @vftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } ++// CHECK-LABEL: @vftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } ++// CHECK-LABEL: @vftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } ++// CHECK-LABEL: @vftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } ++// CHECK-LABEL: @vftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } ++// CHECK-LABEL: @vftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } ++// CHECK-LABEL: @vftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } ++// CHECK-LABEL: @vftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } ++// CHECK-LABEL: @vfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } ++// CHECK-LABEL: @vfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } ++// CHECK-LABEL: @vfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } ++// CHECK-LABEL: @vfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } ++// CHECK-LABEL: @vfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } ++// CHECK-LABEL: @vfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } ++// CHECK-LABEL: @vfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } ++// CHECK-LABEL: @vfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } ++// CHECK-LABEL: @vstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } ++// CHECK-LABEL: @vstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } ++// CHECK-LABEL: @vstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } ++// CHECK-LABEL: @vstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } ++// CHECK-LABEL: @vaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } ++// CHECK-LABEL: @vaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } ++// CHECK-LABEL: @vaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } ++// CHECK-LABEL: @vaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } ++// CHECK-LABEL: @vaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } ++// CHECK-LABEL: @vaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } ++// CHECK-LABEL: @vaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } ++// CHECK-LABEL: @vaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } ++// CHECK-LABEL: @vaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } ++// CHECK-LABEL: @vaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } ++// CHECK-LABEL: @vaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } ++// CHECK-LABEL: @vaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } ++// CHECK-LABEL: @vaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vaddwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vaddwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vaddwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vaddwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vaddwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vaddwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } ++// CHECK-LABEL: @vsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } ++// CHECK-LABEL: @vsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } ++// CHECK-LABEL: @vsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } ++// CHECK-LABEL: @vsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } ++// CHECK-LABEL: @vsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } ++// CHECK-LABEL: @vsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } ++// CHECK-LABEL: @vsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } ++// CHECK-LABEL: @vsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } ++// CHECK-LABEL: @vsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } ++// CHECK-LABEL: @vsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } ++// CHECK-LABEL: @vsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } ++// CHECK-LABEL: @vaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } ++// CHECK-LABEL: @vaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } ++// CHECK-LABEL: @vaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } ++// CHECK-LABEL: @vaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } ++// CHECK-LABEL: @vsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } ++// CHECK-LABEL: @vsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } ++// CHECK-LABEL: @vsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } ++// CHECK-LABEL: @vsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } ++// CHECK-LABEL: @vaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vaddwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vaddwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } ++// CHECK-LABEL: @vmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } ++// CHECK-LABEL: @vmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } ++// CHECK-LABEL: @vmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } ++// CHECK-LABEL: @vmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } ++// CHECK-LABEL: @vmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } ++// CHECK-LABEL: @vmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } ++// CHECK-LABEL: @vmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } ++// CHECK-LABEL: @vmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } ++// CHECK-LABEL: @vmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } ++// CHECK-LABEL: @vmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } ++// CHECK-LABEL: @vmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } ++// CHECK-LABEL: @vmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vmulwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vmulwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vmulwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vmulwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vmulwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vmulwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } ++// CHECK-LABEL: @vmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } ++// CHECK-LABEL: @vmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } ++// CHECK-LABEL: @vmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } ++// CHECK-LABEL: @vmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vmulwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vmulwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } ++// CHECK-LABEL: @vhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } ++// CHECK-LABEL: @vhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } ++// CHECK-LABEL: @vhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } ++// CHECK-LABEL: @vmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmaddwev_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmaddwev_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmaddwev_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __lsx_vmaddwev_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __lsx_vmaddwev_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __lsx_vmaddwev_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmaddwod_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmaddwod_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmaddwod_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __lsx_vmaddwod_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __lsx_vmaddwod_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __lsx_vmaddwod_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __lsx_vmaddwev_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __lsx_vmaddwev_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __lsx_vmaddwev_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __lsx_vmaddwod_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __lsx_vmaddwod_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __lsx_vmaddwod_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmaddwev_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmaddwod_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __lsx_vmaddwev_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __lsx_vmaddwod_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __lsx_vmaddwev_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __lsx_vmaddwod_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } ++// CHECK-LABEL: @vrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } ++// CHECK-LABEL: @vrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } ++// CHECK-LABEL: @vrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } ++// CHECK-LABEL: @vadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } ++// CHECK-LABEL: @vsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } ++// CHECK-LABEL: @vldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } ++// CHECK-LABEL: @vldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } ++// CHECK-LABEL: @vldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } ++// CHECK-LABEL: @vldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } ++// CHECK-LABEL: @vmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } ++// CHECK-LABEL: @vmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } ++// CHECK-LABEL: @vexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } ++// CHECK-LABEL: @vexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } ++// CHECK-LABEL: @vexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } ++// CHECK-LABEL: @vexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } ++// CHECK-LABEL: @vexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } ++// CHECK-LABEL: @vexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } ++// CHECK-LABEL: @vexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } ++// CHECK-LABEL: @vexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } ++// CHECK-LABEL: @vrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } ++// CHECK-LABEL: @vrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } ++// CHECK-LABEL: @vrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } ++// CHECK-LABEL: @vrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } ++// CHECK-LABEL: @vextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } ++// CHECK-LABEL: @vsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { ++ return __lsx_vssrlrni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { ++ return __lsx_vssrlrni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { ++ return __lsx_vssrlrni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { ++ return __lsx_vssrlrni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { ++ return __lsx_vssrarni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { ++ return __lsx_vssrarni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { ++ return __lsx_vssrarni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { ++ return __lsx_vssrarni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } ++// CHECK-LABEL: @vld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } ++// CHECK-LABEL: @vst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret void ++// ++void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @vssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @vssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } ++// CHECK-LABEL: @vssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } ++// CHECK-LABEL: @vssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } ++// CHECK-LABEL: @vorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } ++// CHECK-LABEL: @vldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vldi() { return __lsx_vldi(1); } ++// CHECK-LABEL: @vshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vshuf_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } ++// CHECK-LABEL: @vstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: ret void ++// ++void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } ++// CHECK-LABEL: @vextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } ++// CHECK-LABEL: @bnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } ++// CHECK-LABEL: @bnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } ++// CHECK-LABEL: @bnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } ++// CHECK-LABEL: @bnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } ++// CHECK-LABEL: @bnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } ++// CHECK-LABEL: @bz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } ++// CHECK-LABEL: @bz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } ++// CHECK-LABEL: @bz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } ++// CHECK-LABEL: @bz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } ++// CHECK-LABEL: @bz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } ++// CHECK-LABEL: @vfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } ++// CHECK-LABEL: @vrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrepli_b() { return __lsx_vrepli_b(1); } ++// CHECK-LABEL: @vrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrepli_d() { return __lsx_vrepli_d(1); } ++// CHECK-LABEL: @vrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrepli_h() { return __lsx_vrepli_h(1); } ++// CHECK-LABEL: @vrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrepli_w() { return __lsx_vrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c +new file mode 100644 +index 000000000000..3fc5f73f1193 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c +@@ -0,0 +1,1382 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++v16i8 vslli_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslli_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslli_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslli_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrai_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrai_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrai_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrai_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrari_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrari_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrari_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrari_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrli_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrli_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrli_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrli_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlri_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlri_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlri_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlri_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitclri_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitclri_h(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitclri_w(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitclri_d(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseti_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitseti_h(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitseti_w(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitseti_d(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitrevi_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitrevi_h(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitrevi_w(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitrevi_d(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vaddi_bu(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vaddi_hu(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vaddi_wu(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vaddi_du(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsubi_bu(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsubi_hu(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsubi_wu(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsubi_du(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmaxi_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmaxi_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vmaxi_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmaxi_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmaxi_bu(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmaxi_hu(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmaxi_wu(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmaxi_du(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmini_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmini_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v4i32 vmini_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmini_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmini_bu(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmini_hu(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmini_wu(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmini_du(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vseqi_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vseqi_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vseqi_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vseqi_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_bu(v16u8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_hu(v8u16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_wu(v4u32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_du(v2u64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_bu(v16u8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_hu(v8u16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_wu(v4u32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_du(v2u64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsat_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsat_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsat_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsat_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vsat_bu(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsat_hu(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsat_wu(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsat_du(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vreplvei_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vreplvei_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vreplvei_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vreplvei_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vandi_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vori_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vnori_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vxori_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { ++ v16u8 res = __builtin_lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vshuf4i_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vshuf4i_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vshuf4i_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_b(v16i8 _1, int var) { ++ int res = __builtin_lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_h(v8i16 _1, int var) { ++ int res = __builtin_lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_w(v4i32 _1, int var) { ++ int res = __builtin_lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++long vpickve2gr_d(v2i64 _1, int var) { ++ long res = __builtin_lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_bu(v16i8 _1, int var) { ++ unsigned int res = __builtin_lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_hu(v8i16 _1, int var) { ++ unsigned int res = __builtin_lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_wu(v4i32 _1, int var) { ++ unsigned int res = __builtin_lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int vpickve2gr_du(v2i64 _1, int var) { ++ unsigned long int res = __builtin_lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vinsgr2vr_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vinsgr2vr_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vinsgr2vr_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vinsgr2vr_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsllwil_h_b(v16i8 _1, int var) { ++ v8i16 res = __builtin_lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsllwil_w_h(v8i16 _1, int var) { ++ v4i32 res = __builtin_lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsllwil_d_w(v4i32 _1, int var) { ++ v2i64 res = __builtin_lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsllwil_hu_bu(v16u8 _1, int var) { ++ v8u16 res = __builtin_lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsllwil_wu_hu(v8u16 _1, int var) { ++ v4u32 res = __builtin_lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsllwil_du_wu(v4u32 _1, int var) { ++ v2u64 res = __builtin_lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsrl_v(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsll_v(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} ++ return res; ++} ++ ++void vstelm_b_idx(v16i8 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ __builtin_lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __builtin_lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h_idx(v8i16 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ __builtin_lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __builtin_lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w_idx(v4i32 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ __builtin_lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __builtin_lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d_idx(v2i64 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ __builtin_lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ __builtin_lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++void vstelm_b(v16i8 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} ++ __builtin_lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} ++ __builtin_lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h(v8i16 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} ++ __builtin_lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} ++ __builtin_lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w(v4i32 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} ++ __builtin_lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} ++ __builtin_lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d(v2i64 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} ++ __builtin_lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} ++ __builtin_lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++v16i8 vldrepl_b(void *_1, int var) { ++ v16i8 res = __builtin_lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vldrepl_h(void *_1, int var) { ++ v8i16 res = __builtin_lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} ++ res |= __builtin_lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} ++ res |= __builtin_lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vldrepl_w(void *_1, int var) { ++ v4i32 res = __builtin_lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} ++ res |= __builtin_lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} ++ res |= __builtin_lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vldrepl_d(void *_1, int var) { ++ v2i64 res = __builtin_lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} ++ res |= __builtin_lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} ++ res |= __builtin_lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrotri_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrotri_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrotri_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrotri_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vld(void *_1, int var) { ++ v16i8 res = __builtin_lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} ++ return res; ++} ++ ++void vst(v16i8 _1, void *_2, int var) { ++ __builtin_lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __builtin_lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ __builtin_lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} ++} ++ ++v2i64 vldi(int var) { ++ v2i64 res = __builtin_lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ res |= __builtin_lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} ++ res |= __builtin_lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrepli_b(int var) { ++ v16i8 res = __builtin_lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrepli_d(int var) { ++ v2i64 res = __builtin_lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrepli_h(int var) { ++ v8i16 res = __builtin_lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrepli_w(int var) { ++ v4i32 res = __builtin_lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c +new file mode 100644 +index 000000000000..ef5a390e1838 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c +@@ -0,0 +1,5193 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); ++typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); ++typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); ++ ++ ++// CHECK-LABEL: @vsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } ++// CHECK-LABEL: @vsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } ++// CHECK-LABEL: @vsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } ++// CHECK-LABEL: @vsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } ++// CHECK-LABEL: @vslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } ++// CHECK-LABEL: @vslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } ++// CHECK-LABEL: @vslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } ++// CHECK-LABEL: @vslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } ++// CHECK-LABEL: @vsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } ++// CHECK-LABEL: @vsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } ++// CHECK-LABEL: @vsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } ++// CHECK-LABEL: @vsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } ++// CHECK-LABEL: @vsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } ++// CHECK-LABEL: @vsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } ++// CHECK-LABEL: @vsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } ++// CHECK-LABEL: @vsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } ++// CHECK-LABEL: @vsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrar_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrar_b(_1, _2); ++} ++// CHECK-LABEL: @vsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrar_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrar_h(_1, _2); ++} ++// CHECK-LABEL: @vsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrar_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrar_w(_1, _2); ++} ++// CHECK-LABEL: @vsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrar_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrar_d(_1, _2); ++} ++// CHECK-LABEL: @vsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } ++// CHECK-LABEL: @vsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } ++// CHECK-LABEL: @vsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } ++// CHECK-LABEL: @vsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } ++// CHECK-LABEL: @vsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } ++// CHECK-LABEL: @vsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } ++// CHECK-LABEL: @vsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } ++// CHECK-LABEL: @vsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } ++// CHECK-LABEL: @vsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } ++// CHECK-LABEL: @vsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } ++// CHECK-LABEL: @vsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } ++// CHECK-LABEL: @vsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } ++// CHECK-LABEL: @vsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrlr_b(_1, _2); ++} ++// CHECK-LABEL: @vsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlr_h(_1, _2); ++} ++// CHECK-LABEL: @vsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlr_w(_1, _2); ++} ++// CHECK-LABEL: @vsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlr_d(_1, _2); ++} ++// CHECK-LABEL: @vsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } ++// CHECK-LABEL: @vsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } ++// CHECK-LABEL: @vsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } ++// CHECK-LABEL: @vsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } ++// CHECK-LABEL: @vbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitclr_b(_1, _2); ++} ++// CHECK-LABEL: @vbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vbitclr_h(_1, _2); ++} ++// CHECK-LABEL: @vbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vbitclr_w(_1, _2); ++} ++// CHECK-LABEL: @vbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vbitclr_d(_1, _2); ++} ++// CHECK-LABEL: @vbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } ++// CHECK-LABEL: @vbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } ++// CHECK-LABEL: @vbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } ++// CHECK-LABEL: @vbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } ++// CHECK-LABEL: @vbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitset_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitset_b(_1, _2); ++} ++// CHECK-LABEL: @vbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitset_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vbitset_h(_1, _2); ++} ++// CHECK-LABEL: @vbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitset_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vbitset_w(_1, _2); ++} ++// CHECK-LABEL: @vbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitset_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vbitset_d(_1, _2); ++} ++// CHECK-LABEL: @vbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } ++// CHECK-LABEL: @vbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } ++// CHECK-LABEL: @vbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } ++// CHECK-LABEL: @vbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } ++// CHECK-LABEL: @vbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitrev_b(_1, _2); ++} ++// CHECK-LABEL: @vbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vbitrev_h(_1, _2); ++} ++// CHECK-LABEL: @vbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vbitrev_w(_1, _2); ++} ++// CHECK-LABEL: @vbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vbitrev_d(_1, _2); ++} ++// CHECK-LABEL: @vbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } ++// CHECK-LABEL: @vbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } ++// CHECK-LABEL: @vbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } ++// CHECK-LABEL: @vbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } ++// CHECK-LABEL: @vadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } ++// CHECK-LABEL: @vadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } ++// CHECK-LABEL: @vadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } ++// CHECK-LABEL: @vadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } ++// CHECK-LABEL: @vaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } ++// CHECK-LABEL: @vaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } ++// CHECK-LABEL: @vaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } ++// CHECK-LABEL: @vaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } ++// CHECK-LABEL: @vsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } ++// CHECK-LABEL: @vsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } ++// CHECK-LABEL: @vsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } ++// CHECK-LABEL: @vsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } ++// CHECK-LABEL: @vsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } ++// CHECK-LABEL: @vsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } ++// CHECK-LABEL: @vsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } ++// CHECK-LABEL: @vsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } ++// CHECK-LABEL: @vmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } ++// CHECK-LABEL: @vmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } ++// CHECK-LABEL: @vmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } ++// CHECK-LABEL: @vmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } ++// CHECK-LABEL: @vmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } ++// CHECK-LABEL: @vmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } ++// CHECK-LABEL: @vmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } ++// CHECK-LABEL: @vmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } ++// CHECK-LABEL: @vmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmax_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmax_bu(_1, _2); ++} ++// CHECK-LABEL: @vmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmax_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmax_hu(_1, _2); ++} ++// CHECK-LABEL: @vmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmax_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmax_wu(_1, _2); ++} ++// CHECK-LABEL: @vmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmax_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmax_du(_1, _2); ++} ++// CHECK-LABEL: @vmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } ++// CHECK-LABEL: @vmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } ++// CHECK-LABEL: @vmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } ++// CHECK-LABEL: @vmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } ++// CHECK-LABEL: @vmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } ++// CHECK-LABEL: @vmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } ++// CHECK-LABEL: @vmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } ++// CHECK-LABEL: @vmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } ++// CHECK-LABEL: @vmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } ++// CHECK-LABEL: @vmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } ++// CHECK-LABEL: @vmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } ++// CHECK-LABEL: @vmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } ++// CHECK-LABEL: @vmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmin_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmin_bu(_1, _2); ++} ++// CHECK-LABEL: @vmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmin_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmin_hu(_1, _2); ++} ++// CHECK-LABEL: @vmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmin_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmin_wu(_1, _2); ++} ++// CHECK-LABEL: @vmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmin_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmin_du(_1, _2); ++} ++// CHECK-LABEL: @vmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } ++// CHECK-LABEL: @vmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } ++// CHECK-LABEL: @vmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } ++// CHECK-LABEL: @vmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } ++// CHECK-LABEL: @vseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } ++// CHECK-LABEL: @vseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } ++// CHECK-LABEL: @vseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } ++// CHECK-LABEL: @vseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } ++// CHECK-LABEL: @vseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } ++// CHECK-LABEL: @vseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } ++// CHECK-LABEL: @vseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } ++// CHECK-LABEL: @vseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } ++// CHECK-LABEL: @vslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } ++// CHECK-LABEL: @vslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } ++// CHECK-LABEL: @vslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } ++// CHECK-LABEL: @vslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } ++// CHECK-LABEL: @vslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } ++// CHECK-LABEL: @vslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } ++// CHECK-LABEL: @vslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } ++// CHECK-LABEL: @vslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } ++// CHECK-LABEL: @vslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslt_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vslt_bu(_1, _2); ++} ++// CHECK-LABEL: @vslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslt_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vslt_hu(_1, _2); ++} ++// CHECK-LABEL: @vslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslt_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vslt_wu(_1, _2); ++} ++// CHECK-LABEL: @vslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslt_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vslt_du(_1, _2); ++} ++// CHECK-LABEL: @vslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } ++// CHECK-LABEL: @vslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } ++// CHECK-LABEL: @vslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } ++// CHECK-LABEL: @vslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } ++// CHECK-LABEL: @vsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } ++// CHECK-LABEL: @vsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } ++// CHECK-LABEL: @vsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } ++// CHECK-LABEL: @vsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } ++// CHECK-LABEL: @vslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } ++// CHECK-LABEL: @vslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } ++// CHECK-LABEL: @vslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } ++// CHECK-LABEL: @vslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } ++// CHECK-LABEL: @vsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsle_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsle_bu(_1, _2); ++} ++// CHECK-LABEL: @vsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsle_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsle_hu(_1, _2); ++} ++// CHECK-LABEL: @vsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsle_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsle_wu(_1, _2); ++} ++// CHECK-LABEL: @vsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsle_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsle_du(_1, _2); ++} ++// CHECK-LABEL: @vslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } ++// CHECK-LABEL: @vslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } ++// CHECK-LABEL: @vslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } ++// CHECK-LABEL: @vslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } ++// CHECK-LABEL: @vsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } ++// CHECK-LABEL: @vsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } ++// CHECK-LABEL: @vsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } ++// CHECK-LABEL: @vsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } ++// CHECK-LABEL: @vsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } ++// CHECK-LABEL: @vsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } ++// CHECK-LABEL: @vsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } ++// CHECK-LABEL: @vsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } ++// CHECK-LABEL: @vadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vadda_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vadda_b(_1, _2); ++} ++// CHECK-LABEL: @vadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vadda_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vadda_h(_1, _2); ++} ++// CHECK-LABEL: @vadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vadda_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vadda_w(_1, _2); ++} ++// CHECK-LABEL: @vadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadda_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vadda_d(_1, _2); ++} ++// CHECK-LABEL: @vsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsadd_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsadd_b(_1, _2); ++} ++// CHECK-LABEL: @vsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsadd_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsadd_h(_1, _2); ++} ++// CHECK-LABEL: @vsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsadd_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsadd_w(_1, _2); ++} ++// CHECK-LABEL: @vsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsadd_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsadd_d(_1, _2); ++} ++// CHECK-LABEL: @vsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsadd_bu(_1, _2); ++} ++// CHECK-LABEL: @vsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsadd_hu(_1, _2); ++} ++// CHECK-LABEL: @vsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsadd_wu(_1, _2); ++} ++// CHECK-LABEL: @vsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsadd_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsadd_du(_1, _2); ++} ++// CHECK-LABEL: @vavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } ++// CHECK-LABEL: @vavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } ++// CHECK-LABEL: @vavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } ++// CHECK-LABEL: @vavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } ++// CHECK-LABEL: @vavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vavg_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vavg_bu(_1, _2); ++} ++// CHECK-LABEL: @vavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vavg_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vavg_hu(_1, _2); ++} ++// CHECK-LABEL: @vavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vavg_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vavg_wu(_1, _2); ++} ++// CHECK-LABEL: @vavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vavg_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vavg_du(_1, _2); ++} ++// CHECK-LABEL: @vavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vavgr_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vavgr_b(_1, _2); ++} ++// CHECK-LABEL: @vavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vavgr_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vavgr_h(_1, _2); ++} ++// CHECK-LABEL: @vavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vavgr_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vavgr_w(_1, _2); ++} ++// CHECK-LABEL: @vavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vavgr_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vavgr_d(_1, _2); ++} ++// CHECK-LABEL: @vavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vavgr_bu(_1, _2); ++} ++// CHECK-LABEL: @vavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vavgr_hu(_1, _2); ++} ++// CHECK-LABEL: @vavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vavgr_wu(_1, _2); ++} ++// CHECK-LABEL: @vavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vavgr_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vavgr_du(_1, _2); ++} ++// CHECK-LABEL: @vssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssub_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssub_b(_1, _2); ++} ++// CHECK-LABEL: @vssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssub_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssub_h(_1, _2); ++} ++// CHECK-LABEL: @vssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssub_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssub_w(_1, _2); ++} ++// CHECK-LABEL: @vssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssub_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssub_d(_1, _2); ++} ++// CHECK-LABEL: @vssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssub_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vssub_bu(_1, _2); ++} ++// CHECK-LABEL: @vssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssub_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssub_hu(_1, _2); ++} ++// CHECK-LABEL: @vssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssub_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssub_wu(_1, _2); ++} ++// CHECK-LABEL: @vssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssub_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssub_du(_1, _2); ++} ++// CHECK-LABEL: @vabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vabsd_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vabsd_b(_1, _2); ++} ++// CHECK-LABEL: @vabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vabsd_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vabsd_h(_1, _2); ++} ++// CHECK-LABEL: @vabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vabsd_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vabsd_w(_1, _2); ++} ++// CHECK-LABEL: @vabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vabsd_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vabsd_d(_1, _2); ++} ++// CHECK-LABEL: @vabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vabsd_bu(_1, _2); ++} ++// CHECK-LABEL: @vabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vabsd_hu(_1, _2); ++} ++// CHECK-LABEL: @vabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vabsd_wu(_1, _2); ++} ++// CHECK-LABEL: @vabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vabsd_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vabsd_du(_1, _2); ++} ++// CHECK-LABEL: @vmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } ++// CHECK-LABEL: @vmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } ++// CHECK-LABEL: @vmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } ++// CHECK-LABEL: @vmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } ++// CHECK-LABEL: @vmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmadd_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmadd_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmadd_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmsub_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmsub_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmsub_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } ++// CHECK-LABEL: @vdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } ++// CHECK-LABEL: @vdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } ++// CHECK-LABEL: @vdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } ++// CHECK-LABEL: @vdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vdiv_bu(_1, _2); ++} ++// CHECK-LABEL: @vdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vdiv_hu(_1, _2); ++} ++// CHECK-LABEL: @vdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vdiv_wu(_1, _2); ++} ++// CHECK-LABEL: @vdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vdiv_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vdiv_du(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vhaddw_h_b(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vhaddw_w_h(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vhaddw_d_w(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vhaddw_hu_bu(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vhaddw_wu_hu(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vhaddw_du_wu(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vhsubw_h_b(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vhsubw_w_h(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vhsubw_d_w(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vhsubw_hu_bu(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vhsubw_wu_hu(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vhsubw_du_wu(_1, _2); ++} ++// CHECK-LABEL: @vmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } ++// CHECK-LABEL: @vmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } ++// CHECK-LABEL: @vmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } ++// CHECK-LABEL: @vmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } ++// CHECK-LABEL: @vmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmod_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmod_bu(_1, _2); ++} ++// CHECK-LABEL: @vmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmod_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmod_hu(_1, _2); ++} ++// CHECK-LABEL: @vmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmod_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmod_wu(_1, _2); ++} ++// CHECK-LABEL: @vmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmod_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmod_du(_1, _2); ++} ++// CHECK-LABEL: @vreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplve_b(v16i8 _1, int _2) { ++ return __builtin_lsx_vreplve_b(_1, _2); ++} ++// CHECK-LABEL: @vreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplve_h(v8i16 _1, int _2) { ++ return __builtin_lsx_vreplve_h(_1, _2); ++} ++// CHECK-LABEL: @vreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplve_w(v4i32 _1, int _2) { ++ return __builtin_lsx_vreplve_w(_1, _2); ++} ++// CHECK-LABEL: @vreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplve_d(v2i64 _1, int _2) { ++ return __builtin_lsx_vreplve_d(_1, _2); ++} ++// CHECK-LABEL: @vreplvei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } ++// CHECK-LABEL: @vreplvei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } ++// CHECK-LABEL: @vreplvei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } ++// CHECK-LABEL: @vreplvei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } ++// CHECK-LABEL: @vpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpickev_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpickev_b(_1, _2); ++} ++// CHECK-LABEL: @vpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpickev_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpickev_h(_1, _2); ++} ++// CHECK-LABEL: @vpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpickev_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpickev_w(_1, _2); ++} ++// CHECK-LABEL: @vpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpickev_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpickev_d(_1, _2); ++} ++// CHECK-LABEL: @vpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpickod_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpickod_b(_1, _2); ++} ++// CHECK-LABEL: @vpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpickod_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpickod_h(_1, _2); ++} ++// CHECK-LABEL: @vpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpickod_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpickod_w(_1, _2); ++} ++// CHECK-LABEL: @vpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpickod_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpickod_d(_1, _2); ++} ++// CHECK-LABEL: @vilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vilvh_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vilvh_b(_1, _2); ++} ++// CHECK-LABEL: @vilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vilvh_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vilvh_h(_1, _2); ++} ++// CHECK-LABEL: @vilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vilvh_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vilvh_w(_1, _2); ++} ++// CHECK-LABEL: @vilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vilvh_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vilvh_d(_1, _2); ++} ++// CHECK-LABEL: @vilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vilvl_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vilvl_b(_1, _2); ++} ++// CHECK-LABEL: @vilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vilvl_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vilvl_h(_1, _2); ++} ++// CHECK-LABEL: @vilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vilvl_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vilvl_w(_1, _2); ++} ++// CHECK-LABEL: @vilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vilvl_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vilvl_d(_1, _2); ++} ++// CHECK-LABEL: @vpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpackev_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpackev_b(_1, _2); ++} ++// CHECK-LABEL: @vpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpackev_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpackev_h(_1, _2); ++} ++// CHECK-LABEL: @vpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpackev_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpackev_w(_1, _2); ++} ++// CHECK-LABEL: @vpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpackev_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpackev_d(_1, _2); ++} ++// CHECK-LABEL: @vpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpackod_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpackod_b(_1, _2); ++} ++// CHECK-LABEL: @vpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpackod_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpackod_h(_1, _2); ++} ++// CHECK-LABEL: @vpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpackod_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpackod_w(_1, _2); ++} ++// CHECK-LABEL: @vpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpackod_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpackod_d(_1, _2); ++} ++// CHECK-LABEL: @vshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vshuf_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vshuf_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vshuf_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } ++// CHECK-LABEL: @vandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } ++// CHECK-LABEL: @vor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } ++// CHECK-LABEL: @vori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } ++// CHECK-LABEL: @vnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } ++// CHECK-LABEL: @vnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } ++// CHECK-LABEL: @vxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } ++// CHECK-LABEL: @vxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } ++// CHECK-LABEL: @vbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { ++ return __builtin_lsx_vbitsel_v(_1, _2, _3); ++} ++// CHECK-LABEL: @vbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitseli_b(_1, _2, 1); ++} ++// CHECK-LABEL: @vshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } ++// CHECK-LABEL: @vshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } ++// CHECK-LABEL: @vshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } ++// CHECK-LABEL: @vreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } ++// CHECK-LABEL: @vreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } ++// CHECK-LABEL: @vreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } ++// CHECK-LABEL: @vreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } ++// CHECK-LABEL: @vpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } ++// CHECK-LABEL: @vpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } ++// CHECK-LABEL: @vpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } ++// CHECK-LABEL: @vpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } ++// CHECK-LABEL: @vclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } ++// CHECK-LABEL: @vclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } ++// CHECK-LABEL: @vclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } ++// CHECK-LABEL: @vclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } ++// CHECK-LABEL: @vclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } ++// CHECK-LABEL: @vclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } ++// CHECK-LABEL: @vclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } ++// CHECK-LABEL: @vclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } ++// CHECK-LABEL: @vpickve2gr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_bu(v16i8 _1) { ++ return __builtin_lsx_vpickve2gr_bu(_1, 1); ++} ++// CHECK-LABEL: @vpickve2gr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_hu(v8i16 _1) { ++ return __builtin_lsx_vpickve2gr_hu(_1, 1); ++} ++// CHECK-LABEL: @vpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_wu(v4i32 _1) { ++ return __builtin_lsx_vpickve2gr_wu(_1, 1); ++} ++// CHECK-LABEL: @vpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++unsigned long int vpickve2gr_du(v2i64 _1) { ++ return __builtin_lsx_vpickve2gr_du(_1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vinsgr2vr_b(v16i8 _1) { ++ return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vinsgr2vr_h(v8i16 _1) { ++ return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vinsgr2vr_w(v4i32 _1) { ++ return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vinsgr2vr_d(v2i64 _1) { ++ return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); ++} ++// CHECK-LABEL: @vfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfadd_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfadd_s(_1, _2); ++} ++// CHECK-LABEL: @vfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfadd_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfadd_d(_1, _2); ++} ++// CHECK-LABEL: @vfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfsub_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfsub_s(_1, _2); ++} ++// CHECK-LABEL: @vfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfsub_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfsub_d(_1, _2); ++} ++// CHECK-LABEL: @vfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmul_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmul_s(_1, _2); ++} ++// CHECK-LABEL: @vfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmul_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmul_d(_1, _2); ++} ++// CHECK-LABEL: @vfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfdiv_s(_1, _2); ++} ++// CHECK-LABEL: @vfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfdiv_d(_1, _2); ++} ++// CHECK-LABEL: @vfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcvt_h_s(_1, _2); ++} ++// CHECK-LABEL: @vfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcvt_s_d(_1, _2); ++} ++// CHECK-LABEL: @vfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmin_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmin_s(_1, _2); ++} ++// CHECK-LABEL: @vfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmin_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmin_d(_1, _2); ++} ++// CHECK-LABEL: @vfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmina_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmina_s(_1, _2); ++} ++// CHECK-LABEL: @vfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmina_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmina_d(_1, _2); ++} ++// CHECK-LABEL: @vfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmax_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmax_s(_1, _2); ++} ++// CHECK-LABEL: @vfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmax_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmax_d(_1, _2); ++} ++// CHECK-LABEL: @vfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmaxa_s(_1, _2); ++} ++// CHECK-LABEL: @vfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmaxa_d(_1, _2); ++} ++// CHECK-LABEL: @vfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } ++// CHECK-LABEL: @vfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } ++// CHECK-LABEL: @vfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } ++// CHECK-LABEL: @vfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } ++// CHECK-LABEL: @vfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } ++// CHECK-LABEL: @vfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } ++// CHECK-LABEL: @vfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } ++// CHECK-LABEL: @vfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } ++// CHECK-LABEL: @vfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } ++// CHECK-LABEL: @vfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } ++// CHECK-LABEL: @vflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } ++// CHECK-LABEL: @vflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } ++// CHECK-LABEL: @vfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } ++// CHECK-LABEL: @vfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } ++// CHECK-LABEL: @vfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } ++// CHECK-LABEL: @vfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } ++// CHECK-LABEL: @vftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } ++// CHECK-LABEL: @vftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } ++// CHECK-LABEL: @vftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } ++// CHECK-LABEL: @vftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } ++// CHECK-LABEL: @vftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } ++// CHECK-LABEL: @vftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } ++// CHECK-LABEL: @vftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } ++// CHECK-LABEL: @vftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } ++// CHECK-LABEL: @vffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } ++// CHECK-LABEL: @vffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } ++// CHECK-LABEL: @vffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } ++// CHECK-LABEL: @vffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } ++// CHECK-LABEL: @vandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vandn_v(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vandn_v(_1, _2); ++} ++// CHECK-LABEL: @vneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } ++// CHECK-LABEL: @vneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } ++// CHECK-LABEL: @vneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } ++// CHECK-LABEL: @vneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } ++// CHECK-LABEL: @vmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } ++// CHECK-LABEL: @vmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } ++// CHECK-LABEL: @vmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } ++// CHECK-LABEL: @vmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } ++// CHECK-LABEL: @vmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmuh_bu(_1, _2); ++} ++// CHECK-LABEL: @vmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmuh_hu(_1, _2); ++} ++// CHECK-LABEL: @vmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmuh_wu(_1, _2); ++} ++// CHECK-LABEL: @vmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmuh_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmuh_du(_1, _2); ++} ++// CHECK-LABEL: @vsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @vsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @vsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @vsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsllwil_hu_bu(v16u8 _1) { ++ return __builtin_lsx_vsllwil_hu_bu(_1, 1); ++} ++// CHECK-LABEL: @vsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsllwil_wu_hu(v8u16 _1) { ++ return __builtin_lsx_vsllwil_wu_hu(_1, 1); ++} ++// CHECK-LABEL: @vsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsllwil_du_wu(v4u32 _1) { ++ return __builtin_lsx_vsllwil_du_wu(_1, 1); ++} ++// CHECK-LABEL: @vsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsran_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsran_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsran_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssran_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssran_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssran_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssran_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssran_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssran_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrarn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrarn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrarn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrarn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrarn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrarn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssrarn_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssrarn_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssrarn_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrln_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrln_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrln_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssrln_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssrln_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssrln_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlrn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlrn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlrn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssrlrn_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssrlrn_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssrlrn_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vfrstpi_b(_1, _2, 1); ++} ++// CHECK-LABEL: @vfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vfrstpi_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vfrstp_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vfrstp_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vshuf4i_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } ++// CHECK-LABEL: @vbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } ++// CHECK-LABEL: @vextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vextrins_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vextrins_b(_1, _2, 1); ++} ++// CHECK-LABEL: @vextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vextrins_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vextrins_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vextrins_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vextrins_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vextrins_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vextrins_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } ++// CHECK-LABEL: @vmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } ++// CHECK-LABEL: @vmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } ++// CHECK-LABEL: @vmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } ++// CHECK-LABEL: @vsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsigncov_b(_1, _2); ++} ++// CHECK-LABEL: @vsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsigncov_h(_1, _2); ++} ++// CHECK-LABEL: @vsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsigncov_w(_1, _2); ++} ++// CHECK-LABEL: @vsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsigncov_d(_1, _2); ++} ++// CHECK-LABEL: @vfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfnmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfnmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfnmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfnmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } ++// CHECK-LABEL: @vftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } ++// CHECK-LABEL: @vftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } ++// CHECK-LABEL: @vftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } ++// CHECK-LABEL: @vftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } ++// CHECK-LABEL: @vftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } ++// CHECK-LABEL: @vftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftint_w_d(_1, _2); ++} ++// CHECK-LABEL: @vffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vffint_s_l(_1, _2); ++} ++// CHECK-LABEL: @vftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrz_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrp_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrm_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrne_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } ++// CHECK-LABEL: @vftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } ++// CHECK-LABEL: @vffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } ++// CHECK-LABEL: @vffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } ++// CHECK-LABEL: @vftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } ++// CHECK-LABEL: @vftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } ++// CHECK-LABEL: @vftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } ++// CHECK-LABEL: @vftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } ++// CHECK-LABEL: @vftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } ++// CHECK-LABEL: @vftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } ++// CHECK-LABEL: @vftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrnel_l_s(v4f32 _1) { ++ return __builtin_lsx_vftintrnel_l_s(_1); ++} ++// CHECK-LABEL: @vftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrneh_l_s(v4f32 _1) { ++ return __builtin_lsx_vftintrneh_l_s(_1); ++} ++// CHECK-LABEL: @vfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } ++// CHECK-LABEL: @vfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } ++// CHECK-LABEL: @vfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } ++// CHECK-LABEL: @vfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } ++// CHECK-LABEL: @vfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } ++// CHECK-LABEL: @vfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } ++// CHECK-LABEL: @vfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } ++// CHECK-LABEL: @vfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } ++// CHECK-LABEL: @vstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_b(v16i8 _1, void *_2) { ++ return __builtin_lsx_vstelm_b(_1, _2, 1, 1); ++} ++// CHECK-LABEL: @vstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_h(v8i16 _1, void *_2) { ++ return __builtin_lsx_vstelm_h(_1, _2, 2, 1); ++} ++// CHECK-LABEL: @vstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_w(v4i32 _1, void *_2) { ++ return __builtin_lsx_vstelm_w(_1, _2, 4, 1); ++} ++// CHECK-LABEL: @vstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_d(v2i64 _1, void *_2) { ++ return __builtin_lsx_vstelm_d(_1, _2, 8, 1); ++} ++// CHECK-LABEL: @vaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwev_d_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwev_w_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwev_h_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwod_d_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwod_w_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwod_h_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vaddwev_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vaddwev_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vaddwev_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vaddwod_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vaddwod_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vaddwod_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsubwev_d_w(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsubwev_w_h(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsubwev_h_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsubwod_d_w(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsubwod_w_h(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsubwod_h_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsubwev_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsubwev_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsubwev_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsubwod_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsubwod_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsubwod_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwev_q_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwod_q_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vaddwev_q_du(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vaddwod_q_du(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsubwev_q_d(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsubwod_q_d(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsubwev_q_du(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsubwod_q_du(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwev_d_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwev_w_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwev_h_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwod_d_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwod_w_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwod_h_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmulwev_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmulwev_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmulwev_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmulwod_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmulwod_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmulwod_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwev_q_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwod_q_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmulwev_q_du(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmulwod_q_du(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vhaddw_q_d(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vhaddw_qu_du(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vhsubw_q_d(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vhsubw_qu_du(_1, _2); ++} ++// CHECK-LABEL: @vmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrotr_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vrotr_b(_1, _2); ++} ++// CHECK-LABEL: @vrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrotr_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vrotr_h(_1, _2); ++} ++// CHECK-LABEL: @vrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrotr_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vrotr_w(_1, _2); ++} ++// CHECK-LABEL: @vrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrotr_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vrotr_d(_1, _2); ++} ++// CHECK-LABEL: @vadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } ++// CHECK-LABEL: @vsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } ++// CHECK-LABEL: @vldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } ++// CHECK-LABEL: @vldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } ++// CHECK-LABEL: @vldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } ++// CHECK-LABEL: @vldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } ++// CHECK-LABEL: @vmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } ++// CHECK-LABEL: @vmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } ++// CHECK-LABEL: @vexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } ++// CHECK-LABEL: @vexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } ++// CHECK-LABEL: @vexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } ++// CHECK-LABEL: @vexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } ++// CHECK-LABEL: @vexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } ++// CHECK-LABEL: @vexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } ++// CHECK-LABEL: @vexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } ++// CHECK-LABEL: @vexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } ++// CHECK-LABEL: @vrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } ++// CHECK-LABEL: @vrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } ++// CHECK-LABEL: @vrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } ++// CHECK-LABEL: @vrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } ++// CHECK-LABEL: @vextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } ++// CHECK-LABEL: @vsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrlni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrani_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrani_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrani_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrani_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrarni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrarni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrarni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrarni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrani_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrani_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrani_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrani_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrani_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrani_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrani_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrani_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrarni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrarni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrarni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrarni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrarni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpermi_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpermi_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } ++// CHECK-LABEL: @vst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret void ++// ++void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlrn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlrn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlrn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrln_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrln_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrln_w_d(_1, _2); ++} ++// CHECK-LABEL: @vorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } ++// CHECK-LABEL: @vldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vldi() { return __builtin_lsx_vldi(1); } ++// CHECK-LABEL: @vshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vshuf_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } ++// CHECK-LABEL: @vstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: ret void ++// ++void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } ++// CHECK-LABEL: @vextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } ++// CHECK-LABEL: @bnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } ++// CHECK-LABEL: @bnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } ++// CHECK-LABEL: @bnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } ++// CHECK-LABEL: @bnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } ++// CHECK-LABEL: @bnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } ++// CHECK-LABEL: @bz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } ++// CHECK-LABEL: @bz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } ++// CHECK-LABEL: @bz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } ++// CHECK-LABEL: @bz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } ++// CHECK-LABEL: @bz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } ++// CHECK-LABEL: @vfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_caf_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_caf_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_ceq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_ceq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cle_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cle_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_clt_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_clt_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cne_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cne_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cor_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cor_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cueq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cueq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cule_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cule_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cult_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cult_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cun_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cune_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cune_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cun_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_saf_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_saf_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_seq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_seq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sle_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sle_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_slt_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_slt_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sne_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sne_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sor_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sor_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sueq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sueq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sule_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sule_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sult_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sult_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sun_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sune_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sune_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sun_s(_1, _2); ++} ++// CHECK-LABEL: @vrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } ++// CHECK-LABEL: @vrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } ++// CHECK-LABEL: @vrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } ++// CHECK-LABEL: @vrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } +-- +2.20.1 + diff --git a/0017-LoongArch-Enable-128-bits-vector-by-default-100056.patch b/0017-LoongArch-Enable-128-bits-vector-by-default-100056.patch new file mode 100644 index 0000000..2bea874 --- /dev/null +++ b/0017-LoongArch-Enable-128-bits-vector-by-default-100056.patch @@ -0,0 +1,284 @@ +From 4be89899fcc08eee5dee29636f3a5563a42daf67 Mon Sep 17 00:00:00 2001 +From: Ami-zhang +Date: Tue, 23 Jul 2024 14:02:04 +0800 +Subject: [PATCH 17/23] [LoongArch] Enable 128-bits vector by default (#100056) + +This commit is to enable 128 vector feature by default, in order to be +consistent with gcc. + +(cherry picked from commit b4ef0ba244899a64a1b1e6448eca942cfa5eda18) +--- + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 76 +++++++++++-------- + .../test/Driver/loongarch-default-features.c | 2 +- + clang/test/Driver/loongarch-mlasx.c | 6 +- + clang/test/Driver/loongarch-msimd.c | 4 +- + clang/test/Driver/loongarch-msingle-float.c | 4 +- + clang/test/Driver/loongarch-msoft-float.c | 4 +- + clang/test/Preprocessor/init-loongarch.c | 8 +- + 7 files changed, 60 insertions(+), 44 deletions(-) + +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +index 8b3d2837a4e5..87d7b30ef5d3 100644 +--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -127,6 +127,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + const llvm::Triple &Triple, + const ArgList &Args, + std::vector &Features) { ++ // Enable the `lsx` feature on 64-bit LoongArch by default. ++ if (Triple.isLoongArch64() && ++ (!Args.hasArgNoClaim(clang::driver::options::OPT_march_EQ))) ++ Features.push_back("+lsx"); ++ + std::string ArchName; + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) + ArchName = A->getValue(); +@@ -145,9 +150,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + } else if (A->getOption().matches(options::OPT_msingle_float)) { + Features.push_back("+f"); + Features.push_back("-d"); ++ Features.push_back("-lsx"); + } else /*Soft-float*/ { + Features.push_back("-f"); + Features.push_back("-d"); ++ Features.push_back("-lsx"); + } + } else if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ)) { + StringRef FPU = A->getValue(); +@@ -157,9 +164,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + } else if (FPU == "32") { + Features.push_back("+f"); + Features.push_back("-d"); ++ Features.push_back("-lsx"); + } else if (FPU == "0" || FPU == "none") { + Features.push_back("-f"); + Features.push_back("-d"); ++ Features.push_back("-lsx"); + } else { + D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << FPU; + } +@@ -175,6 +184,42 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + A->ignoreTargetSpecific(); + if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) + A->ignoreTargetSpecific(); ++ if (Arg *A = Args.getLastArgNoClaim(options::OPT_msimd_EQ)) ++ A->ignoreTargetSpecific(); ++ ++ // Select lsx/lasx feature determined by -msimd=. ++ // Option -msimd= precedes -m[no-]lsx and -m[no-]lasx. ++ if (const Arg *A = Args.getLastArg(options::OPT_msimd_EQ)) { ++ StringRef MSIMD = A->getValue(); ++ if (MSIMD == "lsx") { ++ // Option -msimd=lsx depends on 64-bit FPU. ++ // -m*-float and -mfpu=none/0/32 conflict with -msimd=lsx. ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); ++ else ++ Features.push_back("+lsx"); ++ } else if (MSIMD == "lasx") { ++ // Option -msimd=lasx depends on 64-bit FPU and LSX. ++ // -m*-float, -mfpu=none/0/32 and -mno-lsx conflict with -msimd=lasx. ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); ++ else if (llvm::find(Features, "-lsx") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); ++ ++ // The command options do not contain -mno-lasx. ++ if (!Args.getLastArg(options::OPT_mno_lasx)) { ++ Features.push_back("+lsx"); ++ Features.push_back("+lasx"); ++ } ++ } else if (MSIMD == "none") { ++ if (llvm::find(Features, "+lsx") != Features.end()) ++ Features.push_back("-lsx"); ++ if (llvm::find(Features, "+lasx") != Features.end()) ++ Features.push_back("-lasx"); ++ } else { ++ D.Diag(diag::err_drv_loongarch_invalid_msimd_EQ) << MSIMD; ++ } ++ } + + // Select lsx feature determined by -m[no-]lsx. + if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { +@@ -198,8 +243,6 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + if (A->getOption().matches(options::OPT_mlasx)) { + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); +- else if (llvm::find(Features, "-lsx") != Features.end()) +- D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); + else { /*-mlasx*/ + Features.push_back("+lsx"); + Features.push_back("+lasx"); +@@ -207,35 +250,6 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + } else /*-mno-lasx*/ + Features.push_back("-lasx"); + } +- +- // Select lsx/lasx feature determined by -msimd=. +- // Option -msimd= has lower priority than -m[no-]lsx and -m[no-]lasx. +- if (const Arg *A = Args.getLastArg(options::OPT_msimd_EQ)) { +- StringRef MSIMD = A->getValue(); +- if (MSIMD == "lsx") { +- // Option -msimd=lsx depends on 64-bit FPU. +- // -m*-float and -mfpu=none/0/32 conflict with -mlsx. +- if (llvm::find(Features, "-d") != Features.end()) +- D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); +- // The previous option does not contain feature -lsx. +- else if (llvm::find(Features, "-lsx") == Features.end()) +- Features.push_back("+lsx"); +- } else if (MSIMD == "lasx") { +- // Option -msimd=lasx depends on 64-bit FPU and LSX. +- // -m*-float and -mfpu=none/0/32 conflict with -mlsx. +- if (llvm::find(Features, "-d") != Features.end()) +- D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); +- else if (llvm::find(Features, "-lsx") != Features.end()) +- D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); +- // The previous option does not contain feature -lasx. +- else if (llvm::find(Features, "-lasx") == Features.end()) { +- Features.push_back("+lsx"); +- Features.push_back("+lasx"); +- } +- } else if (MSIMD != "none") { +- D.Diag(diag::err_drv_loongarch_invalid_msimd_EQ) << MSIMD; +- } +- } + } + + std::string loongarch::postProcessTargetCPUString(const std::string &CPU, +diff --git a/clang/test/Driver/loongarch-default-features.c b/clang/test/Driver/loongarch-default-features.c +index 3cdf3ba3d23e..90634bbcf003 100644 +--- a/clang/test/Driver/loongarch-default-features.c ++++ b/clang/test/Driver/loongarch-default-features.c +@@ -2,7 +2,7 @@ + // RUN: %clang --target=loongarch64 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA64 + + // LA32: "target-features"="+32bit" +-// LA64: "target-features"="+64bit,+d,+f,+ual" ++// LA64: "target-features"="+64bit,+d,+f,+lsx,+ual" + + int foo(void) { + return 3; +diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c +index 0b934f125c9e..87634ff5a9a4 100644 +--- a/clang/test/Driver/loongarch-mlasx.c ++++ b/clang/test/Driver/loongarch-mlasx.c +@@ -5,7 +5,7 @@ + // RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ + // RUN: FileCheck %s --check-prefix=CC1-NOLASX + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NOLASX ++// RUN: FileCheck %s --check-prefix=CC1-LSX + // RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ + // RUN: FileCheck %s --check-prefix=CC1-LASX + // RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ +@@ -18,7 +18,7 @@ + // RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ + // RUN: FileCheck %s --check-prefix=IR-NOLASX + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NOLASX ++// RUN: FileCheck %s --check-prefix=IR-LSX + // RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ + // RUN: FileCheck %s --check-prefix=IR-LASX + // RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ +@@ -26,9 +26,11 @@ + // RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ + // RUN: FileCheck %s --check-prefix=IR-LASX + ++// CC1-LSX: "-target-feature" "+lsx" + // CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" + // CC1-NOLASX: "-target-feature" "-lasx" + ++// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" + // IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" + // IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" + +diff --git a/clang/test/Driver/loongarch-msimd.c b/clang/test/Driver/loongarch-msimd.c +index cd463300c874..49d298e1b2e3 100644 +--- a/clang/test/Driver/loongarch-msimd.c ++++ b/clang/test/Driver/loongarch-msimd.c +@@ -75,9 +75,9 @@ + // RUN: FileCheck %s --check-prefixes=LSX,LASX + + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX + // RUN: %clang --target=loongarch64 -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX ++// RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ + // RUN: FileCheck %s --check-prefixes=LSX,NOLASX +diff --git a/clang/test/Driver/loongarch-msingle-float.c b/clang/test/Driver/loongarch-msingle-float.c +index bd9b3e8a8c01..4eb0865b53a5 100644 +--- a/clang/test/Driver/loongarch-msingle-float.c ++++ b/clang/test/Driver/loongarch-msingle-float.c +@@ -11,10 +11,10 @@ + // WARN: warning: ignoring '-mabi=lp64s' as it conflicts with that implied by '-msingle-float' (lp64f) + // WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msingle-float' (32) + +-// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d" ++// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d" "-target-feature" "-lsx" + // CC1: "-target-abi" "lp64f" + +-// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d" ++// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d,-lsx" + + int foo(void) { + return 3; +diff --git a/clang/test/Driver/loongarch-msoft-float.c b/clang/test/Driver/loongarch-msoft-float.c +index 0e5121ac84b4..ebf27fb00e30 100644 +--- a/clang/test/Driver/loongarch-msoft-float.c ++++ b/clang/test/Driver/loongarch-msoft-float.c +@@ -11,10 +11,10 @@ + // WARN: warning: ignoring '-mabi=lp64d' as it conflicts with that implied by '-msoft-float' (lp64s) + // WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msoft-float' (0) + +-// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d" ++// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d" "-target-feature" "-lsx" + // CC1: "-target-abi" "lp64s" + +-// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}" ++// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f,-lsx" + + int foo(void) { + return 3; +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +index 154ad82e0f8c..635d029ce9d3 100644 +--- a/clang/test/Preprocessor/init-loongarch.c ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -814,6 +814,8 @@ + // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s + // RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ + // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s + // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ + // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s + // MLSX-NOT: #define __loongarch_asx +@@ -822,12 +824,12 @@ + + // RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ + // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +-// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s + // RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ + // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s + // RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ + // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s + // MLASX: #define __loongarch_asx 1 + // MLASX: #define __loongarch_simd_width 256 + // MLASX: #define __loongarch_sx 1 +@@ -840,8 +842,6 @@ + // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s + // RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ + // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +-// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s + // MNO-LSX-NOT: #define __loongarch_asx + // MNO-LSX-NOT: #define __loongarch_simd_width + // MNO-LSX-NOT: #define __loongarch_sx +-- +2.20.1 + diff --git a/0017-LoongArch-Fix-test-cases-after-2dd8460d8a36.patch b/0017-LoongArch-Fix-test-cases-after-2dd8460d8a36.patch deleted file mode 100644 index ba66cf1..0000000 --- a/0017-LoongArch-Fix-test-cases-after-2dd8460d8a36.patch +++ /dev/null @@ -1,60 +0,0 @@ -From aeaf23493c46e94773dcc47c8df52ceaea5bfa36 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Tue, 10 Jun 2025 09:44:02 +0800 -Subject: [PATCH 3/3] [LoongArch] Fix test cases after 2dd8460d8a36 - ---- - llvm/test/CodeGen/LoongArch/shrinkwrap.ll | 4 +--- - llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll | 12 ++++++------ - 2 files changed, 7 insertions(+), 9 deletions(-) - -diff --git a/llvm/test/CodeGen/LoongArch/shrinkwrap.ll b/llvm/test/CodeGen/LoongArch/shrinkwrap.ll -index 5f15dd2e7eaf..0323b56080f8 100644 ---- a/llvm/test/CodeGen/LoongArch/shrinkwrap.ll -+++ b/llvm/test/CodeGen/LoongArch/shrinkwrap.ll -@@ -9,7 +9,6 @@ define void @eliminate_restore(i32 %n) nounwind { - ; NOSHRINKW: # %bb.0: - ; NOSHRINKW-NEXT: addi.d $sp, $sp, -16 - ; NOSHRINKW-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill --; NOSHRINKW-NEXT: # kill: def $r5 killed $r4 - ; NOSHRINKW-NEXT: addi.w $a1, $a0, 0 - ; NOSHRINKW-NEXT: ori $a0, $zero, 32 - ; NOSHRINKW-NEXT: bltu $a0, $a1, .LBB0_2 -@@ -52,9 +51,8 @@ define void @conditional_alloca(i32 %n) nounwind { - ; NOSHRINKW-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill - ; NOSHRINKW-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill - ; NOSHRINKW-NEXT: addi.d $fp, $sp, 32 --; NOSHRINKW-NEXT: move $a1, $a0 --; NOSHRINKW-NEXT: st.d $a1, $fp, -24 # 8-byte Folded Spill - ; NOSHRINKW-NEXT: addi.w $a1, $a0, 0 -+; NOSHRINKW-NEXT: st.d $a0, $fp, -24 # 8-byte Folded Spill - ; NOSHRINKW-NEXT: ori $a0, $zero, 32 - ; NOSHRINKW-NEXT: bltu $a0, $a1, .LBB1_2 - ; NOSHRINKW-NEXT: b .LBB1_1 -diff --git a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll -index ff242c2a0f98..6fba703018f0 100644 ---- a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll -+++ b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll -@@ -19,13 +19,13 @@ define dso_local ptr @f(i32 noundef signext %i) "frame-pointer"="all" { - ; CHECK-NEXT: addi.d $fp, $sp, 48 - ; CHECK-NEXT: .cfi_def_cfa 22, 0 - ; CHECK-NEXT: st.d $ra, $fp, -40 # 8-byte Folded Spill --; CHECK-NEXT: move $a1, $a0 -+; CHECK-NEXT: # kill: def $r5 killed $r4 -+; CHECK-NEXT: st.w $a0, $fp, -28 - ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(calls) --; CHECK-NEXT: addi.d $a3, $a0, %pc_lo12(calls) --; CHECK-NEXT: ld.w $a0, $a3, 0 --; CHECK-NEXT: addi.d $a2, $a0, 1 --; CHECK-NEXT: st.w $a2, $a3, 0 --; CHECK-NEXT: st.w $a1, $fp, -28 -+; CHECK-NEXT: addi.d $a2, $a0, %pc_lo12(calls) -+; CHECK-NEXT: ld.w $a0, $a2, 0 -+; CHECK-NEXT: addi.d $a1, $a0, 1 -+; CHECK-NEXT: st.w $a1, $a2, 0 - ; CHECK-NEXT: bnez $a0, .LBB0_2 - ; CHECK-NEXT: b .LBB0_1 - ; CHECK-NEXT: .LBB0_1: # %if.then --- -2.20.1 - diff --git a/0017-LoongArch-Set-isBarrier-to-true-for-instruction-b-72.patch b/0017-LoongArch-Set-isBarrier-to-true-for-instruction-b-72.patch new file mode 100644 index 0000000..6d2b6c9 --- /dev/null +++ b/0017-LoongArch-Set-isBarrier-to-true-for-instruction-b-72.patch @@ -0,0 +1,57 @@ +From 009145168af549b02d81777430bb4ed7cea3d1f5 Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Thu, 16 Nov 2023 14:01:58 +0800 +Subject: [PATCH 17/27] [LoongArch] Set isBarrier to true for instruction 'b' + (#72339) + +Instr "b offs26" represent to an unconditional branch in LoongArch. Set +isBarrier to 1 in tablegen for it, so that MCInstrAnalysis can return +correctly. + +Fixes https://github.com/llvm/llvm-project/pull/71903. + +(cherry picked from commit 42a4d5e8cab1537515d92ed56d6e17b673ed352f) +--- + llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 1 + + llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp | 8 ++------ + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 166379d7d592..05ae36a9781d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -586,6 +586,7 @@ class Br_I26 op> + : FmtI26 { + let isBranch = 1; + let isTerminator = 1; ++ let isBarrier = 1; + } + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 + +diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +index 6e1919fc2261..468ee79615d6 100644 +--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp ++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +@@ -94,17 +94,13 @@ TEST_P(InstrAnalysisTest, IsBranch) { + + TEST_P(InstrAnalysisTest, IsConditionalBranch) { + EXPECT_TRUE(Analysis->isConditionalBranch(beq())); +- // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is +- // wrong. The following patch will fix it. +- EXPECT_TRUE(Analysis->isConditionalBranch(b())); ++ EXPECT_FALSE(Analysis->isConditionalBranch(b())); + EXPECT_FALSE(Analysis->isConditionalBranch(bl())); + } + + TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); +- // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is +- // wrong. The following patch will fix it. +- EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); ++ EXPECT_TRUE(Analysis->isUnconditionalBranch(b())); + EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); +-- +2.20.1 + diff --git a/0018-LoongArch-Add-LoongArch-V1.1-instructions-definition.patch b/0018-LoongArch-Add-LoongArch-V1.1-instructions-definition.patch new file mode 100644 index 0000000..fab1598 --- /dev/null +++ b/0018-LoongArch-Add-LoongArch-V1.1-instructions-definition.patch @@ -0,0 +1,432 @@ +From 2d97c967d3ee5c978cd6e2267c5d73dd0df68cff Mon Sep 17 00:00:00 2001 +From: Ami-zhang +Date: Wed, 17 Jan 2024 11:15:05 +0800 +Subject: [PATCH 18/23] [LoongArch] Add LoongArch V1.1 instructions definitions + and MC tests (#78238) + +LoongArch V1.1 instrucions include floating-point approximate reciprocal +instructions and atomic instrucions. And add testcases for these +instrucions meanwhile. + +(cherry picked from commit 84bdee2875da364be7eb2144b1ae530f6a05f0e2) +--- + .../LoongArch/LoongArchFloat32InstrInfo.td | 2 + + .../LoongArch/LoongArchFloat64InstrInfo.td | 2 + + .../Target/LoongArch/LoongArchInstrInfo.td | 34 ++++++- + .../LoongArch/LoongArchLASXInstrInfo.td | 4 + + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 + + llvm/test/MC/LoongArch/Basic/Float/d-arith.s | 8 ++ + llvm/test/MC/LoongArch/Basic/Float/f-arith.s | 8 ++ + llvm/test/MC/LoongArch/Basic/Integer/atomic.s | 92 +++++++++++++++++++ + llvm/test/MC/LoongArch/lasx/frecip.s | 8 ++ + llvm/test/MC/LoongArch/lasx/frsqrt.s | 8 ++ + llvm/test/MC/LoongArch/lsx/frecip.s | 8 ++ + llvm/test/MC/LoongArch/lsx/frsqrt.s | 8 ++ + 12 files changed, 184 insertions(+), 2 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +index 65120c083f49..f30837912e75 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +@@ -50,6 +50,8 @@ def FNEG_S : FP_ALU_2R<0x01141400>; + def FSQRT_S : FP_ALU_2R<0x01144400>; + def FRECIP_S : FP_ALU_2R<0x01145400>; + def FRSQRT_S : FP_ALU_2R<0x01146400>; ++def FRECIPE_S : FP_ALU_2R<0x01147400>; ++def FRSQRTE_S : FP_ALU_2R<0x01148400>; + def FSCALEB_S : FP_ALU_3R<0x01108000>; + def FLOGB_S : FP_ALU_2R<0x01142400>; + def FCOPYSIGN_S : FP_ALU_3R<0x01128000>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +index 437c1e4d7be2..0ea4c564b045 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +@@ -34,6 +34,8 @@ def FNEG_D : FP_ALU_2R<0x01141800, FPR64>; + def FSQRT_D : FP_ALU_2R<0x01144800, FPR64>; + def FRECIP_D : FP_ALU_2R<0x01145800, FPR64>; + def FRSQRT_D : FP_ALU_2R<0x01146800, FPR64>; ++def FRECIPE_D : FP_ALU_2R<0x01147800, FPR64>; ++def FRSQRTE_D : FP_ALU_2R<0x01148800, FPR64>; + def FSCALEB_D : FP_ALU_3R<0x01110000, FPR64>; + def FLOGB_D : FP_ALU_2R<0x01142800, FPR64>; + def FCOPYSIGN_D : FP_ALU_3R<0x01130000, FPR64>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index ecd0c2b71b85..756c460f916b 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -634,15 +634,24 @@ class AM_3R op> + : Fmt3R; + +-let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in ++let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { + class LLBase op> + : Fmt2RI14; ++class LLBase_ACQ op> ++ : Fmt2R; ++} + +-let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Constraints = "$rd = $dst" in ++let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Constraints = "$rd = $dst" in { + class SCBase op> + : Fmt2RI14; ++class SCBase_128 op> ++ : Fmt3R; ++class SCBase_REL op> ++ : Fmt2R; ++} + + let hasSideEffects = 1 in + class IOCSRRD op> +@@ -754,6 +763,8 @@ def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), + // Atomic Memory Access Instructions + def LL_W : LLBase<0x20000000>; + def SC_W : SCBase<0x21000000>; ++def LLACQ_W : LLBase_ACQ<0x38578000>; ++def SCREL_W : SCBase_REL<0x38578400>; + + // Barrier Instructions + def DBAR : MISC_I15<0x38720000>; +@@ -875,8 +886,12 @@ def STLE_W : STORE_3R<0x387f0000>; + def STLE_D : STORE_3R<0x387f8000>; + + // Atomic Memory Access Instructions for 64-bits ++def AMSWAP_B : AM_3R<0x385c0000>; ++def AMSWAP_H : AM_3R<0x385c8000>; + def AMSWAP_W : AM_3R<0x38600000>; + def AMSWAP_D : AM_3R<0x38608000>; ++def AMADD_B : AM_3R<0x385d0000>; ++def AMADD_H : AM_3R<0x385d8000>; + def AMADD_W : AM_3R<0x38610000>; + def AMADD_D : AM_3R<0x38618000>; + def AMAND_W : AM_3R<0x38620000>; +@@ -893,8 +908,12 @@ def AMMAX_WU : AM_3R<0x38670000>; + def AMMAX_DU : AM_3R<0x38678000>; + def AMMIN_WU : AM_3R<0x38680000>; + def AMMIN_DU : AM_3R<0x38688000>; ++def AMSWAP__DB_B : AM_3R<0x385e0000>; ++def AMSWAP__DB_H : AM_3R<0x385e8000>; + def AMSWAP__DB_W : AM_3R<0x38690000>; + def AMSWAP__DB_D : AM_3R<0x38698000>; ++def AMADD__DB_B : AM_3R<0x385f0000>; ++def AMADD__DB_H : AM_3R<0x385f8000>; + def AMADD__DB_W : AM_3R<0x386a0000>; + def AMADD__DB_D : AM_3R<0x386a8000>; + def AMAND__DB_W : AM_3R<0x386b0000>; +@@ -911,8 +930,19 @@ def AMMAX__DB_WU : AM_3R<0x38700000>; + def AMMAX__DB_DU : AM_3R<0x38708000>; + def AMMIN__DB_WU : AM_3R<0x38710000>; + def AMMIN__DB_DU : AM_3R<0x38718000>; ++def AMCAS_B : AM_3R<0x38580000>; ++def AMCAS_H : AM_3R<0x38588000>; ++def AMCAS_W : AM_3R<0x38590000>; ++def AMCAS_D : AM_3R<0x38598000>; ++def AMCAS__DB_B : AM_3R<0x385a0000>; ++def AMCAS__DB_H : AM_3R<0x385a8000>; ++def AMCAS__DB_W : AM_3R<0x385b0000>; ++def AMCAS__DB_D : AM_3R<0x385b8000>; + def LL_D : LLBase<0x22000000>; + def SC_D : SCBase<0x23000000>; ++def SC_Q : SCBase_128<0x38570000>; ++def LLACQ_D : LLBase_ACQ<0x38578800>; ++def SCREL_D : SCBase_REL<0x38578C00>; + + // CRC Check Instructions + def CRC_W_B_W : ALU_3R<0x00240000>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 5b6721cdf1b4..454915ac8c0a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -773,6 +773,10 @@ def XVFRECIP_S : LASX2R_XX<0x769cf400>; + def XVFRECIP_D : LASX2R_XX<0x769cf800>; + def XVFRSQRT_S : LASX2R_XX<0x769d0400>; + def XVFRSQRT_D : LASX2R_XX<0x769d0800>; ++def XVFRECIPE_S : LASX2R_XX<0x769d1400>; ++def XVFRECIPE_D : LASX2R_XX<0x769d1800>; ++def XVFRSQRTE_S : LASX2R_XX<0x769d2400>; ++def XVFRSQRTE_D : LASX2R_XX<0x769d2800>; + + def XVFCVTL_S_H : LASX2R_XX<0x769de800>; + def XVFCVTH_S_H : LASX2R_XX<0x769dec00>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 3519fa3142c3..6d60d7074ec3 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -918,6 +918,10 @@ def VFRECIP_S : LSX2R_VV<0x729cf400>; + def VFRECIP_D : LSX2R_VV<0x729cf800>; + def VFRSQRT_S : LSX2R_VV<0x729d0400>; + def VFRSQRT_D : LSX2R_VV<0x729d0800>; ++def VFRECIPE_S : LSX2R_VV<0x729d1400>; ++def VFRECIPE_D : LSX2R_VV<0x729d1800>; ++def VFRSQRTE_S : LSX2R_VV<0x729d2400>; ++def VFRSQRTE_D : LSX2R_VV<0x729d2800>; + + def VFCVTL_S_H : LSX2R_VV<0x729de800>; + def VFCVTH_S_H : LSX2R_VV<0x729dec00>; +diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s +index 6b2c67e9a2cc..8e19d2e34f3c 100644 +--- a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s ++++ b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s +@@ -78,10 +78,18 @@ fsqrt.d $fa2, $ft3 + # ASM: encoding: [0x7b,0x5b,0x14,0x01] + frecip.d $fs3, $fs3 + ++# ASM-AND-OBJ: frecipe.d $fa0, $fa0 ++# ASM: encoding: [0x00,0x78,0x14,0x01] ++frecipe.d $fa0, $fa0 ++ + # ASM-AND-OBJ: frsqrt.d $ft14, $fa3 + # ASM: encoding: [0x76,0x68,0x14,0x01] + frsqrt.d $ft14, $fa3 + ++# ASM-AND-OBJ: frsqrte.d $fa1, $fa1 ++# ASM: encoding: [0x21,0x88,0x14,0x01] ++frsqrte.d $fa1, $fa1 ++ + # ASM-AND-OBJ: fscaleb.d $ft4, $ft6, $fs2 + # ASM: encoding: [0xcc,0x69,0x11,0x01] + fscaleb.d $ft4, $ft6, $fs2 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s +index 155e783cf435..c32151adbf3b 100644 +--- a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s ++++ b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s +@@ -73,10 +73,18 @@ fsqrt.s $fs3, $ft10 + # ASM: encoding: [0x71,0x57,0x14,0x01] + frecip.s $ft9, $fs3 + ++# ASM-AND-OBJ: frecipe.s $fa0, $fa0 ++# ASM: encoding: [0x00,0x74,0x14,0x01] ++frecipe.s $fa0, $fa0 ++ + # ASM-AND-OBJ: frsqrt.s $fs1, $ft4 + # ASM: encoding: [0x99,0x65,0x14,0x01] + frsqrt.s $fs1, $ft4 + ++# ASM-AND-OBJ: frsqrte.s $fa1, $fa1 ++# ASM: encoding: [0x21,0x84,0x14,0x01] ++frsqrte.s $fa1, $fa1 ++ + # ASM-AND-OBJ: fscaleb.s $ft13, $ft15, $fa6 + # ASM: encoding: [0xf5,0x9a,0x10,0x01] + fscaleb.s $ft13, $ft15, $fa6 +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s +index a35211db8851..69acdeef935c 100644 +--- a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s ++++ b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s +@@ -21,6 +21,14 @@ ll.w $tp, $s4, 220 + # CHECK-ASM: encoding: [0xd3,0x39,0x00,0x21] + sc.w $t7, $t2, 56 + ++# CHECK-ASM-AND-OBJ: llacq.w $t1, $t2 ++# CHECK-ASM: encoding: [0xcd,0x81,0x57,0x38] ++llacq.w $t1, $t2 ++ ++# CHECK-ASM-AND-OBJ: screl.w $t1, $t2 ++# CHECK-ASM: encoding: [0xcd,0x85,0x57,0x38] ++screl.w $t1, $t2 ++ + + + ############################################################# +@@ -29,6 +37,14 @@ sc.w $t7, $t2, 56 + + .ifdef LA64 + ++# CHECK64-ASM-AND-OBJ: amswap.b $a2, $t0, $s1 ++# CHECK64-ASM: encoding: [0x06,0x33,0x5c,0x38] ++amswap.b $a2, $t0, $s1, 0 ++ ++# CHECK64-ASM-AND-OBJ: amswap.h $a2, $t0, $s1 ++# CHECK64-ASM: encoding: [0x06,0xb3,0x5c,0x38] ++amswap.h $a2, $t0, $s1, 0 ++ + # CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 + # CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] + amswap.w $a2, $t0, $s1, 0 +@@ -41,6 +57,14 @@ amswap.w $zero, $t0, $zero + # CHECK64-ASM: encoding: [0xa0,0x00,0x6a,0x38] + amadd_db.w $zero, $zero, $a1 + ++# CHECK64-ASM-AND-OBJ: amswap.b $a2, $t0, $s1 ++# CHECK64-ASM: encoding: [0x06,0x33,0x5c,0x38] ++amswap.b $a2, $t0, $s1 ++ ++# CHECK64-ASM-AND-OBJ: amswap.h $a2, $t0, $s1 ++# CHECK64-ASM: encoding: [0x06,0xb3,0x5c,0x38] ++amswap.h $a2, $t0, $s1 ++ + # CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 + # CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] + amswap.w $a2, $t0, $s1 +@@ -49,6 +73,14 @@ amswap.w $a2, $t0, $s1 + # CHECK64-ASM: encoding: [0xc2,0xba,0x60,0x38] + amswap.d $tp, $t2, $fp + ++# CHECK64-ASM-AND-OBJ: amadd.b $a4, $t0, $r21 ++# CHECK64-ASM: encoding: [0xa8,0x32,0x5d,0x38] ++amadd.b $a4, $t0, $r21 ++ ++# CHECK64-ASM-AND-OBJ: amadd.h $a1, $t5, $s6 ++# CHECK64-ASM: encoding: [0xa5,0xc7,0x5d,0x38] ++amadd.h $a1, $t5, $s6 ++ + # CHECK64-ASM-AND-OBJ: amadd.w $a4, $t0, $r21 + # CHECK64-ASM: encoding: [0xa8,0x32,0x61,0x38] + amadd.w $a4, $t0, $r21 +@@ -113,6 +145,14 @@ ammin.wu $a4, $t6, $s7 + # CHECK64-ASM: encoding: [0x27,0xc3,0x68,0x38] + ammin.du $a3, $t4, $s2 + ++# CHECK64-ASM-AND-OBJ: amswap_db.b $a2, $t0, $s1 ++# CHECK64-ASM: encoding: [0x06,0x33,0x5e,0x38] ++amswap_db.b $a2, $t0, $s1 ++ ++# CHECK64-ASM-AND-OBJ: amswap_db.h $tp, $t2, $fp ++# CHECK64-ASM: encoding: [0xc2,0xba,0x5e,0x38] ++amswap_db.h $tp, $t2, $fp ++ + # CHECK64-ASM-AND-OBJ: amswap_db.w $a2, $t0, $s1 + # CHECK64-ASM: encoding: [0x06,0x33,0x69,0x38] + amswap_db.w $a2, $t0, $s1 +@@ -121,6 +161,14 @@ amswap_db.w $a2, $t0, $s1 + # CHECK64-ASM: encoding: [0xc2,0xba,0x69,0x38] + amswap_db.d $tp, $t2, $fp + ++# CHECK64-ASM-AND-OBJ: amadd_db.b $zero, $zero, $a1 ++# CHECK64-ASM: encoding: [0xa0,0x00,0x5f,0x38] ++amadd_db.b $zero, $zero, $a1 ++ ++# CHECK64-ASM-AND-OBJ: amadd_db.h $a4, $t0, $r21 ++# CHECK64-ASM: encoding: [0xa8,0xb2,0x5f,0x38] ++amadd_db.h $a4, $t0, $r21 ++ + # CHECK64-ASM-AND-OBJ: amadd_db.w $a4, $t0, $r21 + # CHECK64-ASM: encoding: [0xa8,0x32,0x6a,0x38] + amadd_db.w $a4, $t0, $r21 +@@ -185,6 +233,38 @@ ammin_db.wu $a4, $t6, $s7 + # CHECK64-ASM: encoding: [0x27,0xc3,0x71,0x38] + ammin_db.du $a3, $t4, $s2 + ++# CHECK64-ASM-AND-OBJ: amcas.b $t1, $t2, $t3 ++# CHECK64-ASM: encoding: [0xed,0x39,0x58,0x38] ++amcas.b $t1, $t2, $t3 ++ ++# CHECK64-ASM-AND-OBJ: amcas.h $t1, $t2, $t3 ++# CHECK64-ASM: encoding: [0xed,0xb9,0x58,0x38] ++amcas.h $t1, $t2, $t3 ++ ++# CHECK64-ASM-AND-OBJ: amcas.w $t1, $t2, $t3 ++# CHECK64-ASM: encoding: [0xed,0x39,0x59,0x38] ++amcas.w $t1, $t2, $t3 ++ ++# CHECK64-ASM-AND-OBJ: amcas.d $t1, $t2, $t3 ++# CHECK64-ASM: encoding: [0xed,0xb9,0x59,0x38] ++amcas.d $t1, $t2, $t3 ++ ++# CHECK64-ASM-AND-OBJ: amcas_db.b $t1, $t2, $t3 ++# CHECK64-ASM: encoding: [0xed,0x39,0x5a,0x38] ++amcas_db.b $t1, $t2, $t3 ++ ++# CHECK64-ASM-AND-OBJ: amcas_db.h $t1, $t2, $t3 ++# CHECK64-ASM: encoding: [0xed,0xb9,0x5a,0x38] ++amcas_db.h $t1, $t2, $t3 ++ ++# CHECK64-ASM-AND-OBJ: amcas_db.w $t1, $t2, $t3 ++# CHECK64-ASM: encoding: [0xed,0x39,0x5b,0x38] ++amcas_db.w $t1, $t2, $t3 ++ ++# CHECK64-ASM-AND-OBJ: amcas_db.d $t1, $t2, $t3 ++# CHECK64-ASM: encoding: [0xed,0xb9,0x5b,0x38] ++amcas_db.d $t1, $t2, $t3 ++ + # CHECK64-ASM-AND-OBJ: ll.d $s2, $s4, 16 + # CHECK64-ASM: encoding: [0x79,0x13,0x00,0x22] + ll.d $s2, $s4, 16 +@@ -193,5 +273,17 @@ ll.d $s2, $s4, 16 + # CHECK64-ASM: encoding: [0x31,0xf6,0x00,0x23] + sc.d $t5, $t5, 244 + ++# CHECK64-ASM-AND-OBJ: sc.q $t7, $t2, $t5 ++# CHECK64-ASM: encoding: [0x33,0x3a,0x57,0x38] ++sc.q $t7, $t2, $t5 ++ ++# CHECK64-ASM-AND-OBJ: llacq.d $t1, $t2 ++# CHECK64-ASM: encoding: [0xcd,0x89,0x57,0x38] ++llacq.d $t1, $t2 ++ ++# CHECK64-ASM-AND-OBJ: screl.d $t1, $t2 ++# CHECK64-ASM: encoding: [0xcd,0x8d,0x57,0x38] ++screl.d $t1, $t2 ++ + .endif + +diff --git a/llvm/test/MC/LoongArch/lasx/frecip.s b/llvm/test/MC/LoongArch/lasx/frecip.s +index 1bb3ce02fb9c..e95b03a96eba 100644 +--- a/llvm/test/MC/LoongArch/lasx/frecip.s ++++ b/llvm/test/MC/LoongArch/lasx/frecip.s +@@ -10,3 +10,11 @@ xvfrecip.s $xr3, $xr16 + xvfrecip.d $xr17, $xr24 + # CHECK-INST: xvfrecip.d $xr17, $xr24 + # CHECK-ENCODING: encoding: [0x11,0xfb,0x9c,0x76] ++ ++xvfrecipe.s $xr3, $xr16 ++# CHECK-INST: xvfrecipe.s $xr3, $xr16 ++# CHECK-ENCODING: encoding: [0x03,0x16,0x9d,0x76] ++ ++xvfrecipe.d $xr17, $xr24 ++# CHECK-INST: xvfrecipe.d $xr17, $xr24 ++# CHECK-ENCODING: encoding: [0x11,0x1b,0x9d,0x76] +diff --git a/llvm/test/MC/LoongArch/lasx/frsqrt.s b/llvm/test/MC/LoongArch/lasx/frsqrt.s +index af96e10832df..d1048f9ff8f0 100644 +--- a/llvm/test/MC/LoongArch/lasx/frsqrt.s ++++ b/llvm/test/MC/LoongArch/lasx/frsqrt.s +@@ -10,3 +10,11 @@ xvfrsqrt.s $xr31, $xr25 + xvfrsqrt.d $xr14, $xr22 + # CHECK-INST: xvfrsqrt.d $xr14, $xr22 + # CHECK-ENCODING: encoding: [0xce,0x0a,0x9d,0x76] ++ ++xvfrsqrte.s $xr31, $xr25 ++# CHECK-INST: xvfrsqrte.s $xr31, $xr25 ++# CHECK-ENCODING: encoding: [0x3f,0x27,0x9d,0x76] ++ ++xvfrsqrte.d $xr14, $xr22 ++# CHECK-INST: xvfrsqrte.d $xr14, $xr22 ++# CHECK-ENCODING: encoding: [0xce,0x2a,0x9d,0x76] +diff --git a/llvm/test/MC/LoongArch/lsx/frecip.s b/llvm/test/MC/LoongArch/lsx/frecip.s +index d8c8278d1667..cd6d925e1470 100644 +--- a/llvm/test/MC/LoongArch/lsx/frecip.s ++++ b/llvm/test/MC/LoongArch/lsx/frecip.s +@@ -10,3 +10,11 @@ vfrecip.s $vr29, $vr14 + vfrecip.d $vr24, $vr9 + # CHECK-INST: vfrecip.d $vr24, $vr9 + # CHECK-ENCODING: encoding: [0x38,0xf9,0x9c,0x72] ++ ++vfrecipe.s $vr29, $vr14 ++# CHECK-INST: vfrecipe.s $vr29, $vr14 ++# CHECK-ENCODING: encoding: [0xdd,0x15,0x9d,0x72] ++ ++vfrecipe.d $vr24, $vr9 ++# CHECK-INST: vfrecipe.d $vr24, $vr9 ++# CHECK-ENCODING: encoding: [0x38,0x19,0x9d,0x72] +diff --git a/llvm/test/MC/LoongArch/lsx/frsqrt.s b/llvm/test/MC/LoongArch/lsx/frsqrt.s +index 68b0cc091b8a..d8b9fc3d0684 100644 +--- a/llvm/test/MC/LoongArch/lsx/frsqrt.s ++++ b/llvm/test/MC/LoongArch/lsx/frsqrt.s +@@ -10,3 +10,11 @@ vfrsqrt.s $vr19, $vr30 + vfrsqrt.d $vr1, $vr0 + # CHECK-INST: vfrsqrt.d $vr1, $vr0 + # CHECK-ENCODING: encoding: [0x01,0x08,0x9d,0x72] ++ ++vfrsqrte.s $vr19, $vr30 ++# CHECK-INST: vfrsqrte.s $vr19, $vr30 ++# CHECK-ENCODING: encoding: [0xd3,0x27,0x9d,0x72] ++ ++vfrsqrte.d $vr1, $vr0 ++# CHECK-INST: vfrsqrte.d $vr1, $vr0 ++# CHECK-ENCODING: encoding: [0x01,0x28,0x9d,0x72] +-- +2.20.1 + diff --git a/0018-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch b/0018-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch new file mode 100644 index 0000000..bec6a95 --- /dev/null +++ b/0018-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch @@ -0,0 +1,11692 @@ +From 6261519a92fc5ab54c5f12f940c33128f981dad9 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Fri, 27 Oct 2023 15:58:55 +0800 +Subject: [PATCH 18/42] [LoongArch][CodeGen] Add LASX builtin testcases + +(cherry picked from commit 535408eedbf812d9038bd40a0faae5001d2256cf) + +--- + .../LoongArch/lasx/builtin-alias-error.c | 1373 +++++ + .../CodeGen/LoongArch/lasx/builtin-alias.c | 4430 ++++++++++++++++ + .../CodeGen/LoongArch/lasx/builtin-error.c | 1392 ++++++ + clang/test/CodeGen/LoongArch/lasx/builtin.c | 4452 +++++++++++++++++ + 4 files changed, 11647 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin.c + +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c +new file mode 100644 +index 000000000000..2a3862bbe3c1 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c +@@ -0,0 +1,1373 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s ++ ++#include ++ ++v32i8 xvslli_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslli_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslli_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslli_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrai_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrai_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrai_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrai_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrari_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrari_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrari_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrari_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrli_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrli_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrli_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrli_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlri_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlri_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlri_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlri_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitclri_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitclri_h(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitclri_w(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitclri_d(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseti_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitseti_h(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitseti_w(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitseti_d(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitrevi_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitrevi_h(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitrevi_w(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitrevi_d(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvaddi_bu(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvaddi_hu(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvaddi_wu(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvaddi_du(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsubi_bu(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsubi_hu(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsubi_wu(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsubi_du(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmaxi_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmaxi_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvmaxi_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmaxi_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmaxi_bu(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmaxi_hu(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmaxi_wu(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmaxi_du(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmini_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmini_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v8i32 xvmini_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmini_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmini_bu(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmini_hu(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmini_wu(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmini_du(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvseqi_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvseqi_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvseqi_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvseqi_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_bu(v32u8 _1, int var) { ++ v32i8 res = __lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_hu(v16u16 _1, int var) { ++ v16i16 res = __lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_wu(v8u32 _1, int var) { ++ v8i32 res = __lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_du(v4u64 _1, int var) { ++ v4i64 res = __lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_bu(v32u8 _1, int var) { ++ v32i8 res = __lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_hu(v16u16 _1, int var) { ++ v16i16 res = __lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_wu(v8u32 _1, int var) { ++ v8i32 res = __lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_du(v4u64 _1, int var) { ++ v4i64 res = __lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsat_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsat_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsat_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsat_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvsat_bu(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsat_hu(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsat_wu(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsat_du(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepl128vei_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepl128vei_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepl128vei_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepl128vei_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvandi_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvori_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvnori_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvxori_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { ++ v32u8 res = __lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvshuf4i_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvshuf4i_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvshuf4i_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpermi_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsllwil_h_b(v32i8 _1, int var) { ++ v16i16 res = __lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsllwil_w_h(v16i16 _1, int var) { ++ v8i32 res = __lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsllwil_d_w(v8i32 _1, int var) { ++ v4i64 res = __lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { ++ v16u16 res = __lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { ++ v8u32 res = __lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsllwil_du_wu(v8u32 _1, int var) { ++ v4u64 res = __lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsrl_v(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsll_v(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvld(void *_1, int var) { ++ v32i8 res = __lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} ++ return res; ++} ++ ++void xvst(v32i8 _1, void *_2, int var) { ++ __lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ __lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} ++} ++ ++void xvstelm_b(v32i8 _1, void * _2, int var) { ++ __lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} ++ __lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} ++ __lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h(v16i16 _1, void * _2, int var) { ++ __lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} ++ __lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} ++ __lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w(v8i32 _1, void * _2, int var) { ++ __lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} ++ __lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} ++ __lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d(v4i64 _1, void * _2, int var) { ++ __lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} ++ __lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} ++ __lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++void xvstelm_b_idx(v32i8 _1, void * _2, int var) { ++ __lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ __lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ __lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h_idx(v16i16 _1, void * _2, int var) { ++ __lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ __lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w_idx(v8i32 _1, void * _2, int var) { ++ __lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ __lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d_idx(v4i64 _1, void * _2, int var) { ++ __lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ __lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpickve_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpickve_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldi(int var) { ++ v4i64 res = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ res |= __lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} ++ res |= __lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvinsgr2vr_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsgr2vr_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvldrepl_b(void *_1, int var) { ++ v32i8 res = __lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvldrepl_h(void *_1, int var) { ++ v16i16 res = __lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} ++ res |= __lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} ++ res |= __lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvldrepl_w(void *_1, int var) { ++ v8i32 res = __lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} ++ res |= __lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} ++ res |= __lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldrepl_d(void *_1, int var) { ++ v4i64 res = __lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} ++ res |= __lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} ++ res |= __lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++int xvpickve2gr_w(v8i32 _1, int var) { ++ int res = __lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++unsigned int xvpickve2gr_wu(v8i32 _1, int var) { ++ unsigned int res = __lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++long xvpickve2gr_d(v4i64 _1, int var) { ++ long res = __lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int xvpickve2gr_du(v4i64 _1, int var) { ++ unsigned long int res = __lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrotri_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrotri_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrotri_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrotri_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4f64 xvpickve_d_f(v4f64 _1, int var) { ++ v4f64 res = __lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res += __lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res += __lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} ++ return res; ++} ++ ++v8f32 xvpickve_w_f(v8f32 _1, int var) { ++ v8f32 res = __lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res += __lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res += __lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepli_b(int var) { ++ v32i8 res = __lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepli_d(int var) { ++ v4i64 res = __lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepli_h(int var) { ++ v16i16 res = __lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepli_w(int var) { ++ v8i32 res = __lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +new file mode 100644 +index 000000000000..09b2d5fcacf5 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +@@ -0,0 +1,4430 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++// CHECK-LABEL: @xvsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } ++// CHECK-LABEL: @xvsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } ++// CHECK-LABEL: @xvsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } ++// CHECK-LABEL: @xvsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } ++// CHECK-LABEL: @xvslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } ++// CHECK-LABEL: @xvslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } ++// CHECK-LABEL: @xvslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } ++// CHECK-LABEL: @xvslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } ++// CHECK-LABEL: @xvsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } ++// CHECK-LABEL: @xvsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } ++// CHECK-LABEL: @xvsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } ++// CHECK-LABEL: @xvsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } ++// CHECK-LABEL: @xvsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } ++// CHECK-LABEL: @xvsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } ++// CHECK-LABEL: @xvsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } ++// CHECK-LABEL: @xvsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } ++// CHECK-LABEL: @xvsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } ++// CHECK-LABEL: @xvsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } ++// CHECK-LABEL: @xvsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } ++// CHECK-LABEL: @xvsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } ++// CHECK-LABEL: @xvsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } ++// CHECK-LABEL: @xvsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } ++// CHECK-LABEL: @xvsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } ++// CHECK-LABEL: @xvsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } ++// CHECK-LABEL: @xvsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } ++// CHECK-LABEL: @xvsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } ++// CHECK-LABEL: @xvsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } ++// CHECK-LABEL: @xvsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } ++// CHECK-LABEL: @xvsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } ++// CHECK-LABEL: @xvsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } ++// CHECK-LABEL: @xvsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } ++// CHECK-LABEL: @xvsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } ++// CHECK-LABEL: @xvsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } ++// CHECK-LABEL: @xvsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } ++// CHECK-LABEL: @xvsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } ++// CHECK-LABEL: @xvsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } ++// CHECK-LABEL: @xvsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } ++// CHECK-LABEL: @xvsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } ++// CHECK-LABEL: @xvsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } ++// CHECK-LABEL: @xvsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } ++// CHECK-LABEL: @xvbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } ++// CHECK-LABEL: @xvbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } ++// CHECK-LABEL: @xvbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } ++// CHECK-LABEL: @xvbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } ++// CHECK-LABEL: @xvbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } ++// CHECK-LABEL: @xvbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } ++// CHECK-LABEL: @xvbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } ++// CHECK-LABEL: @xvbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } ++// CHECK-LABEL: @xvbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } ++// CHECK-LABEL: @xvbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } ++// CHECK-LABEL: @xvbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } ++// CHECK-LABEL: @xvbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } ++// CHECK-LABEL: @xvbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } ++// CHECK-LABEL: @xvbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } ++// CHECK-LABEL: @xvbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } ++// CHECK-LABEL: @xvbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } ++// CHECK-LABEL: @xvbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } ++// CHECK-LABEL: @xvbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } ++// CHECK-LABEL: @xvbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } ++// CHECK-LABEL: @xvbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } ++// CHECK-LABEL: @xvbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } ++// CHECK-LABEL: @xvadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } ++// CHECK-LABEL: @xvadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } ++// CHECK-LABEL: @xvadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } ++// CHECK-LABEL: @xvadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } ++// CHECK-LABEL: @xvaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } ++// CHECK-LABEL: @xvaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } ++// CHECK-LABEL: @xvaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } ++// CHECK-LABEL: @xvaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } ++// CHECK-LABEL: @xvsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } ++// CHECK-LABEL: @xvsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } ++// CHECK-LABEL: @xvsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } ++// CHECK-LABEL: @xvsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } ++// CHECK-LABEL: @xvsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } ++// CHECK-LABEL: @xvsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } ++// CHECK-LABEL: @xvsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } ++// CHECK-LABEL: @xvsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } ++// CHECK-LABEL: @xvmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } ++// CHECK-LABEL: @xvmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } ++// CHECK-LABEL: @xvmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } ++// CHECK-LABEL: @xvmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } ++// CHECK-LABEL: @xvmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } ++// CHECK-LABEL: @xvmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } ++// CHECK-LABEL: @xvmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } ++// CHECK-LABEL: @xvmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } ++// CHECK-LABEL: @xvmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } ++// CHECK-LABEL: @xvmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } ++// CHECK-LABEL: @xvmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } ++// CHECK-LABEL: @xvmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } ++// CHECK-LABEL: @xvmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } ++// CHECK-LABEL: @xvmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } ++// CHECK-LABEL: @xvmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } ++// CHECK-LABEL: @xvmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } ++// CHECK-LABEL: @xvmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } ++// CHECK-LABEL: @xvmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } ++// CHECK-LABEL: @xvmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } ++// CHECK-LABEL: @xvmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } ++// CHECK-LABEL: @xvmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } ++// CHECK-LABEL: @xvmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } ++// CHECK-LABEL: @xvmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } ++// CHECK-LABEL: @xvmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } ++// CHECK-LABEL: @xvmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } ++// CHECK-LABEL: @xvmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } ++// CHECK-LABEL: @xvmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } ++// CHECK-LABEL: @xvmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } ++// CHECK-LABEL: @xvmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } ++// CHECK-LABEL: @xvseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } ++// CHECK-LABEL: @xvseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } ++// CHECK-LABEL: @xvseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } ++// CHECK-LABEL: @xvseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } ++// CHECK-LABEL: @xvseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } ++// CHECK-LABEL: @xvseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } ++// CHECK-LABEL: @xvseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } ++// CHECK-LABEL: @xvseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } ++// CHECK-LABEL: @xvslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } ++// CHECK-LABEL: @xvslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } ++// CHECK-LABEL: @xvslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } ++// CHECK-LABEL: @xvslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } ++// CHECK-LABEL: @xvslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } ++// CHECK-LABEL: @xvslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } ++// CHECK-LABEL: @xvslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } ++// CHECK-LABEL: @xvslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } ++// CHECK-LABEL: @xvslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } ++// CHECK-LABEL: @xvslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } ++// CHECK-LABEL: @xvslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } ++// CHECK-LABEL: @xvslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } ++// CHECK-LABEL: @xvslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } ++// CHECK-LABEL: @xvslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } ++// CHECK-LABEL: @xvslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } ++// CHECK-LABEL: @xvslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } ++// CHECK-LABEL: @xvsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } ++// CHECK-LABEL: @xvsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } ++// CHECK-LABEL: @xvsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } ++// CHECK-LABEL: @xvsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } ++// CHECK-LABEL: @xvslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } ++// CHECK-LABEL: @xvslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } ++// CHECK-LABEL: @xvslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } ++// CHECK-LABEL: @xvslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } ++// CHECK-LABEL: @xvsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } ++// CHECK-LABEL: @xvsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } ++// CHECK-LABEL: @xvsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } ++// CHECK-LABEL: @xvsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } ++// CHECK-LABEL: @xvslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } ++// CHECK-LABEL: @xvslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } ++// CHECK-LABEL: @xvslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } ++// CHECK-LABEL: @xvslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } ++// CHECK-LABEL: @xvsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } ++// CHECK-LABEL: @xvsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } ++// CHECK-LABEL: @xvsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } ++// CHECK-LABEL: @xvsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } ++// CHECK-LABEL: @xvsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } ++// CHECK-LABEL: @xvsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } ++// CHECK-LABEL: @xvsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } ++// CHECK-LABEL: @xvsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } ++// CHECK-LABEL: @xvadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } ++// CHECK-LABEL: @xvadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } ++// CHECK-LABEL: @xvadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } ++// CHECK-LABEL: @xvadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } ++// CHECK-LABEL: @xvsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } ++// CHECK-LABEL: @xvsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } ++// CHECK-LABEL: @xvsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } ++// CHECK-LABEL: @xvsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } ++// CHECK-LABEL: @xvsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } ++// CHECK-LABEL: @xvsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } ++// CHECK-LABEL: @xvavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } ++// CHECK-LABEL: @xvavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } ++// CHECK-LABEL: @xvavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } ++// CHECK-LABEL: @xvavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } ++// CHECK-LABEL: @xvavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } ++// CHECK-LABEL: @xvavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } ++// CHECK-LABEL: @xvavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } ++// CHECK-LABEL: @xvavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } ++// CHECK-LABEL: @xvavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } ++// CHECK-LABEL: @xvavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } ++// CHECK-LABEL: @xvavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } ++// CHECK-LABEL: @xvavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } ++// CHECK-LABEL: @xvavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } ++// CHECK-LABEL: @xvavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } ++// CHECK-LABEL: @xvavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } ++// CHECK-LABEL: @xvavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } ++// CHECK-LABEL: @xvssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } ++// CHECK-LABEL: @xvssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } ++// CHECK-LABEL: @xvssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } ++// CHECK-LABEL: @xvssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } ++// CHECK-LABEL: @xvssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } ++// CHECK-LABEL: @xvssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } ++// CHECK-LABEL: @xvssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } ++// CHECK-LABEL: @xvssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } ++// CHECK-LABEL: @xvabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } ++// CHECK-LABEL: @xvabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } ++// CHECK-LABEL: @xvabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } ++// CHECK-LABEL: @xvabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } ++// CHECK-LABEL: @xvabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } ++// CHECK-LABEL: @xvabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } ++// CHECK-LABEL: @xvabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } ++// CHECK-LABEL: @xvabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } ++// CHECK-LABEL: @xvmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } ++// CHECK-LABEL: @xvmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } ++// CHECK-LABEL: @xvmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } ++// CHECK-LABEL: @xvmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } ++// CHECK-LABEL: @xvmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } ++// CHECK-LABEL: @xvdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } ++// CHECK-LABEL: @xvdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } ++// CHECK-LABEL: @xvdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } ++// CHECK-LABEL: @xvdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } ++// CHECK-LABEL: @xvdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } ++// CHECK-LABEL: @xvdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } ++// CHECK-LABEL: @xvdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } ++// CHECK-LABEL: @xvhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } ++// CHECK-LABEL: @xvmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } ++// CHECK-LABEL: @xvmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } ++// CHECK-LABEL: @xvmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } ++// CHECK-LABEL: @xvmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } ++// CHECK-LABEL: @xvmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } ++// CHECK-LABEL: @xvmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } ++// CHECK-LABEL: @xvrepl128vei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } ++// CHECK-LABEL: @xvpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } ++// CHECK-LABEL: @xvpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } ++// CHECK-LABEL: @xvpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } ++// CHECK-LABEL: @xvpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } ++// CHECK-LABEL: @xvpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } ++// CHECK-LABEL: @xvpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } ++// CHECK-LABEL: @xvpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } ++// CHECK-LABEL: @xvpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } ++// CHECK-LABEL: @xvilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } ++// CHECK-LABEL: @xvilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } ++// CHECK-LABEL: @xvilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } ++// CHECK-LABEL: @xvilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } ++// CHECK-LABEL: @xvilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } ++// CHECK-LABEL: @xvilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } ++// CHECK-LABEL: @xvilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } ++// CHECK-LABEL: @xvilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } ++// CHECK-LABEL: @xvpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } ++// CHECK-LABEL: @xvpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } ++// CHECK-LABEL: @xvpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } ++// CHECK-LABEL: @xvpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } ++// CHECK-LABEL: @xvpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } ++// CHECK-LABEL: @xvpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } ++// CHECK-LABEL: @xvpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } ++// CHECK-LABEL: @xvpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } ++// CHECK-LABEL: @xvshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } ++// CHECK-LABEL: @xvand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } ++// CHECK-LABEL: @xvandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } ++// CHECK-LABEL: @xvor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } ++// CHECK-LABEL: @xvori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } ++// CHECK-LABEL: @xvnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } ++// CHECK-LABEL: @xvnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } ++// CHECK-LABEL: @xvxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } ++// CHECK-LABEL: @xvxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } ++// CHECK-LABEL: @xvbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } ++// CHECK-LABEL: @xvbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } ++// CHECK-LABEL: @xvshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } ++// CHECK-LABEL: @xvreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } ++// CHECK-LABEL: @xvreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } ++// CHECK-LABEL: @xvreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } ++// CHECK-LABEL: @xvreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } ++// CHECK-LABEL: @xvpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } ++// CHECK-LABEL: @xvpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } ++// CHECK-LABEL: @xvpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } ++// CHECK-LABEL: @xvpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } ++// CHECK-LABEL: @xvclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } ++// CHECK-LABEL: @xvclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } ++// CHECK-LABEL: @xvclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } ++// CHECK-LABEL: @xvclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } ++// CHECK-LABEL: @xvclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } ++// CHECK-LABEL: @xvclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } ++// CHECK-LABEL: @xvclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } ++// CHECK-LABEL: @xvclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } ++// CHECK-LABEL: @xvfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } ++// CHECK-LABEL: @xvfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } ++// CHECK-LABEL: @xvfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } ++// CHECK-LABEL: @xvfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } ++// CHECK-LABEL: @xvfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } ++// CHECK-LABEL: @xvfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } ++// CHECK-LABEL: @xvfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } ++// CHECK-LABEL: @xvfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } ++// CHECK-LABEL: @xvfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } ++// CHECK-LABEL: @xvfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } ++// CHECK-LABEL: @xvfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } ++// CHECK-LABEL: @xvfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } ++// CHECK-LABEL: @xvfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } ++// CHECK-LABEL: @xvfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } ++// CHECK-LABEL: @xvfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } ++// CHECK-LABEL: @xvfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } ++// CHECK-LABEL: @xvfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } ++// CHECK-LABEL: @xvfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } ++// CHECK-LABEL: @xvfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } ++// CHECK-LABEL: @xvfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } ++// CHECK-LABEL: @xvfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } ++// CHECK-LABEL: @xvfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } ++// CHECK-LABEL: @xvfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } ++// CHECK-LABEL: @xvfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } ++// CHECK-LABEL: @xvfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } ++// CHECK-LABEL: @xvfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } ++// CHECK-LABEL: @xvflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } ++// CHECK-LABEL: @xvflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } ++// CHECK-LABEL: @xvfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } ++// CHECK-LABEL: @xvfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } ++// CHECK-LABEL: @xvfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } ++// CHECK-LABEL: @xvfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } ++// CHECK-LABEL: @xvftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } ++// CHECK-LABEL: @xvftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } ++// CHECK-LABEL: @xvftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } ++// CHECK-LABEL: @xvftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } ++// CHECK-LABEL: @xvftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } ++// CHECK-LABEL: @xvftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } ++// CHECK-LABEL: @xvftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } ++// CHECK-LABEL: @xvftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } ++// CHECK-LABEL: @xvffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } ++// CHECK-LABEL: @xvffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } ++// CHECK-LABEL: @xvffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } ++// CHECK-LABEL: @xvffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } ++// CHECK-LABEL: @xvreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } ++// CHECK-LABEL: @xvreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } ++// CHECK-LABEL: @xvreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } ++// CHECK-LABEL: @xvreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } ++// CHECK-LABEL: @xvpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } ++// CHECK-LABEL: @xvandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } ++// CHECK-LABEL: @xvneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } ++// CHECK-LABEL: @xvneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } ++// CHECK-LABEL: @xvneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } ++// CHECK-LABEL: @xvneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } ++// CHECK-LABEL: @xvmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } ++// CHECK-LABEL: @xvmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } ++// CHECK-LABEL: @xvmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } ++// CHECK-LABEL: @xvmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } ++// CHECK-LABEL: @xvmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } ++// CHECK-LABEL: @xvmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } ++// CHECK-LABEL: @xvmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } ++// CHECK-LABEL: @xvmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } ++// CHECK-LABEL: @xvsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @xvsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @xvsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @xvsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } ++// CHECK-LABEL: @xvsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } ++// CHECK-LABEL: @xvsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } ++// CHECK-LABEL: @xvsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } ++// CHECK-LABEL: @xvssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } ++// CHECK-LABEL: @xvssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } ++// CHECK-LABEL: @xvfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } ++// CHECK-LABEL: @xvbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } ++// CHECK-LABEL: @xvbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } ++// CHECK-LABEL: @xvextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } ++// CHECK-LABEL: @xvmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } ++// CHECK-LABEL: @xvmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } ++// CHECK-LABEL: @xvmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } ++// CHECK-LABEL: @xvmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } ++// CHECK-LABEL: @xvsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } ++// CHECK-LABEL: @xvsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } ++// CHECK-LABEL: @xvsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } ++// CHECK-LABEL: @xvsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } ++// CHECK-LABEL: @xvfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } ++// CHECK-LABEL: @xvftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } ++// CHECK-LABEL: @xvftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } ++// CHECK-LABEL: @xvftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } ++// CHECK-LABEL: @xvftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } ++// CHECK-LABEL: @xvftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } ++// CHECK-LABEL: @xvftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } ++// CHECK-LABEL: @xvffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } ++// CHECK-LABEL: @xvftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } ++// CHECK-LABEL: @xvftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } ++// CHECK-LABEL: @xvftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } ++// CHECK-LABEL: @xvffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } ++// CHECK-LABEL: @xvffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } ++// CHECK-LABEL: @xvftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } ++// CHECK-LABEL: @xvftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } ++// CHECK-LABEL: @xvftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } ++// CHECK-LABEL: @xvftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } ++// CHECK-LABEL: @xvftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } ++// CHECK-LABEL: @xvftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } ++// CHECK-LABEL: @xvftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } ++// CHECK-LABEL: @xvftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } ++// CHECK-LABEL: @xvfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } ++// CHECK-LABEL: @xvfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } ++// CHECK-LABEL: @xvfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } ++// CHECK-LABEL: @xvfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } ++// CHECK-LABEL: @xvfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } ++// CHECK-LABEL: @xvfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } ++// CHECK-LABEL: @xvfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } ++// CHECK-LABEL: @xvfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } ++// CHECK-LABEL: @xvld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } ++// CHECK-LABEL: @xvst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret void ++// ++void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } ++// CHECK-LABEL: @xvstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } ++// CHECK-LABEL: @xvstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } ++// CHECK-LABEL: @xvstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } ++// CHECK-LABEL: @xvstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } ++// CHECK-LABEL: @xvinsve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } ++// CHECK-LABEL: @xvinsve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } ++// CHECK-LABEL: @xvpickve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } ++// CHECK-LABEL: @xvpickve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } ++// CHECK-LABEL: @xvssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } ++// CHECK-LABEL: @xvldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvldi() { return __lasx_xvldi(1); } ++// CHECK-LABEL: @xvldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } ++// CHECK-LABEL: @xvstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: ret void ++// ++void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } ++// CHECK-LABEL: @xvextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } ++// CHECK-LABEL: @xvinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } ++// CHECK-LABEL: @xvinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } ++// CHECK-LABEL: @xvreplve0_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } ++// CHECK-LABEL: @xvreplve0_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } ++// CHECK-LABEL: @xvreplve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } ++// CHECK-LABEL: @xvreplve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } ++// CHECK-LABEL: @xvreplve0_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } ++// CHECK-LABEL: @vext2xv_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } ++// CHECK-LABEL: @vext2xv_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } ++// CHECK-LABEL: @vext2xv_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } ++// CHECK-LABEL: @vext2xv_w_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } ++// CHECK-LABEL: @vext2xv_d_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } ++// CHECK-LABEL: @vext2xv_d_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } ++// CHECK-LABEL: @vext2xv_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } ++// CHECK-LABEL: @vext2xv_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } ++// CHECK-LABEL: @vext2xv_wu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } ++// CHECK-LABEL: @vext2xv_du_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } ++// CHECK-LABEL: @xvpermi_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } ++// CHECK-LABEL: @xvpermi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } ++// CHECK-LABEL: @xvperm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } ++// CHECK-LABEL: @xvldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } ++// CHECK-LABEL: @xvldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } ++// CHECK-LABEL: @xvldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } ++// CHECK-LABEL: @xvldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } ++// CHECK-LABEL: @xvpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } ++// CHECK-LABEL: @xvaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } ++// CHECK-LABEL: @xvrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } ++// CHECK-LABEL: @xvrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } ++// CHECK-LABEL: @xvrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } ++// CHECK-LABEL: @xvadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } ++// CHECK-LABEL: @xvsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } ++// CHECK-LABEL: @xvmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } ++// CHECK-LABEL: @xvexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } ++// CHECK-LABEL: @xvexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } ++// CHECK-LABEL: @xvexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } ++// CHECK-LABEL: @xvexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } ++// CHECK-LABEL: @xvexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } ++// CHECK-LABEL: @xvexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } ++// CHECK-LABEL: @xvexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } ++// CHECK-LABEL: @xvexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } ++// CHECK-LABEL: @xvrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } ++// CHECK-LABEL: @xvrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } ++// CHECK-LABEL: @xvrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } ++// CHECK-LABEL: @xvrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } ++// CHECK-LABEL: @xvextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } ++// CHECK-LABEL: @xvsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xbnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } ++// CHECK-LABEL: @xbnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } ++// CHECK-LABEL: @xbnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } ++// CHECK-LABEL: @xbnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } ++// CHECK-LABEL: @xbnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } ++// CHECK-LABEL: @xbz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } ++// CHECK-LABEL: @xbz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } ++// CHECK-LABEL: @xbz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } ++// CHECK-LABEL: @xbz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } ++// CHECK-LABEL: @xbz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } ++// CHECK-LABEL: @xvfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } ++// CHECK-LABEL: @xvpickve_d_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } ++// CHECK-LABEL: @xvpickve_w_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } ++// CHECK-LABEL: @xvrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } ++// CHECK-LABEL: @xvrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } ++// CHECK-LABEL: @xvrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } ++// CHECK-LABEL: @xvrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c +new file mode 100644 +index 000000000000..724484465769 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c +@@ -0,0 +1,1392 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++v32i8 xvslli_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslli_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslli_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslli_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrai_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrai_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrai_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrai_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrari_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrari_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrari_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrari_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrli_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrli_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrli_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrli_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlri_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlri_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlri_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlri_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitclri_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitclri_h(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitclri_w(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitclri_d(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseti_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitseti_h(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitseti_w(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitseti_d(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitrevi_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitrevi_h(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitrevi_w(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitrevi_d(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvaddi_bu(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvaddi_hu(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvaddi_wu(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvaddi_du(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsubi_bu(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsubi_hu(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsubi_wu(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsubi_du(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmaxi_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmaxi_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvmaxi_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmaxi_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmaxi_bu(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmaxi_hu(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmaxi_wu(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmaxi_du(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmini_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmini_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v8i32 xvmini_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmini_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmini_bu(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmini_hu(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmini_wu(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmini_du(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvseqi_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvseqi_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvseqi_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvseqi_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_bu(v32u8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_hu(v16u16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_wu(v8u32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_du(v4u64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_bu(v32u8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_hu(v16u16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_wu(v8u32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_du(v4u64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsat_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsat_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsat_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsat_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvsat_bu(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsat_hu(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsat_wu(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsat_du(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepl128vei_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepl128vei_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepl128vei_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepl128vei_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvandi_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvori_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvnori_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvxori_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvshuf4i_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvshuf4i_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvshuf4i_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpermi_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsllwil_h_b(v32i8 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsllwil_w_h(v16i16 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsllwil_d_w(v8i32 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { ++ v16u16 res = __builtin_lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { ++ v8u32 res = __builtin_lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsllwil_du_wu(v8u32 _1, int var) { ++ v4u64 res = __builtin_lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsrl_v(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsll_v(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvld(void *_1, int var) { ++ v32i8 res = __builtin_lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} ++ return res; ++} ++ ++void xvst(v32i8 _1, void *_2, int var) { ++ __builtin_lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __builtin_lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ __builtin_lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} ++} ++ ++void xvstelm_b(v32i8 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} ++ __builtin_lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} ++ __builtin_lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h(v16i16 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} ++ __builtin_lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} ++ __builtin_lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w(v8i32 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} ++ __builtin_lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} ++ __builtin_lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d(v4i64 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} ++ __builtin_lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} ++ __builtin_lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++void xvstelm_b_idx(v32i8 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ __builtin_lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ __builtin_lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h_idx(v16i16 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ __builtin_lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __builtin_lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w_idx(v8i32 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ __builtin_lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __builtin_lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d_idx(v4i64 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ __builtin_lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __builtin_lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpickve_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpickve_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldi(int var) { ++ v4i64 res = __builtin_lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ res |= __builtin_lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} ++ res |= __builtin_lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvinsgr2vr_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsgr2vr_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvldrepl_b(void *_1, int var) { ++ v32i8 res = __builtin_lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvldrepl_h(void *_1, int var) { ++ v16i16 res = __builtin_lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} ++ res |= __builtin_lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} ++ res |= __builtin_lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvldrepl_w(void *_1, int var) { ++ v8i32 res = __builtin_lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} ++ res |= __builtin_lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} ++ res |= __builtin_lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldrepl_d(void *_1, int var) { ++ v4i64 res = __builtin_lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} ++ res |= __builtin_lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} ++ res |= __builtin_lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++int xvpickve2gr_w(v8i32 _1, int var) { ++ int res = __builtin_lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++unsigned int xvpickve2gr_wu(v8i32 _1, int var) { ++ unsigned int res = __builtin_lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++long xvpickve2gr_d(v4i64 _1, int var) { ++ long res = __builtin_lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int xvpickve2gr_du(v4i64 _1, int var) { ++ unsigned long int res = __builtin_lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrotri_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrotri_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrotri_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrotri_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4f64 xvpickve_d_f(v4f64 _1, int var) { ++ v4f64 res = __builtin_lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res += __builtin_lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res += __builtin_lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} ++ return res; ++} ++ ++v8f32 xvpickve_w_f(v8f32 _1, int var) { ++ v8f32 res = __builtin_lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res += __builtin_lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res += __builtin_lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepli_b(int var) { ++ v32i8 res = __builtin_lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepli_d(int var) { ++ v4i64 res = __builtin_lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepli_h(int var) { ++ v16i16 res = __builtin_lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepli_w(int var) { ++ v8i32 res = __builtin_lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c +new file mode 100644 +index 000000000000..0185f2004d52 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c +@@ -0,0 +1,4452 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++// CHECK-LABEL: @xvsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } ++// CHECK-LABEL: @xvsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } ++// CHECK-LABEL: @xvsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } ++// CHECK-LABEL: @xvsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } ++// CHECK-LABEL: @xvslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } ++// CHECK-LABEL: @xvslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } ++// CHECK-LABEL: @xvslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } ++// CHECK-LABEL: @xvslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } ++// CHECK-LABEL: @xvsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } ++// CHECK-LABEL: @xvsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } ++// CHECK-LABEL: @xvsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } ++// CHECK-LABEL: @xvsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } ++// CHECK-LABEL: @xvsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } ++// CHECK-LABEL: @xvsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } ++// CHECK-LABEL: @xvsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } ++// CHECK-LABEL: @xvsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } ++// CHECK-LABEL: @xvsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } ++// CHECK-LABEL: @xvsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } ++// CHECK-LABEL: @xvsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } ++// CHECK-LABEL: @xvsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } ++// CHECK-LABEL: @xvsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } ++// CHECK-LABEL: @xvsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } ++// CHECK-LABEL: @xvsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } ++// CHECK-LABEL: @xvsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } ++// CHECK-LABEL: @xvsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } ++// CHECK-LABEL: @xvsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } ++// CHECK-LABEL: @xvsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } ++// CHECK-LABEL: @xvsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } ++// CHECK-LABEL: @xvsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } ++// CHECK-LABEL: @xvsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } ++// CHECK-LABEL: @xvsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } ++// CHECK-LABEL: @xvsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } ++// CHECK-LABEL: @xvsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } ++// CHECK-LABEL: @xvsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } ++// CHECK-LABEL: @xvsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } ++// CHECK-LABEL: @xvsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } ++// CHECK-LABEL: @xvsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } ++// CHECK-LABEL: @xvsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } ++// CHECK-LABEL: @xvsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } ++// CHECK-LABEL: @xvsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } ++// CHECK-LABEL: @xvbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } ++// CHECK-LABEL: @xvbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } ++// CHECK-LABEL: @xvbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } ++// CHECK-LABEL: @xvbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } ++// CHECK-LABEL: @xvbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } ++// CHECK-LABEL: @xvbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } ++// CHECK-LABEL: @xvbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } ++// CHECK-LABEL: @xvbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } ++// CHECK-LABEL: @xvbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } ++// CHECK-LABEL: @xvbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } ++// CHECK-LABEL: @xvbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } ++// CHECK-LABEL: @xvbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } ++// CHECK-LABEL: @xvbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } ++// CHECK-LABEL: @xvbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } ++// CHECK-LABEL: @xvbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } ++// CHECK-LABEL: @xvbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } ++// CHECK-LABEL: @xvbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } ++// CHECK-LABEL: @xvbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } ++// CHECK-LABEL: @xvbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } ++// CHECK-LABEL: @xvbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } ++// CHECK-LABEL: @xvbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } ++// CHECK-LABEL: @xvadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } ++// CHECK-LABEL: @xvadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } ++// CHECK-LABEL: @xvadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } ++// CHECK-LABEL: @xvadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } ++// CHECK-LABEL: @xvaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } ++// CHECK-LABEL: @xvaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } ++// CHECK-LABEL: @xvaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } ++// CHECK-LABEL: @xvaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } ++// CHECK-LABEL: @xvsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } ++// CHECK-LABEL: @xvsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } ++// CHECK-LABEL: @xvsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } ++// CHECK-LABEL: @xvsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } ++// CHECK-LABEL: @xvsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } ++// CHECK-LABEL: @xvsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } ++// CHECK-LABEL: @xvsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } ++// CHECK-LABEL: @xvsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } ++// CHECK-LABEL: @xvmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } ++// CHECK-LABEL: @xvmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } ++// CHECK-LABEL: @xvmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } ++// CHECK-LABEL: @xvmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } ++// CHECK-LABEL: @xvmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } ++// CHECK-LABEL: @xvmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } ++// CHECK-LABEL: @xvmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } ++// CHECK-LABEL: @xvmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } ++// CHECK-LABEL: @xvmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } ++// CHECK-LABEL: @xvmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } ++// CHECK-LABEL: @xvmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } ++// CHECK-LABEL: @xvmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } ++// CHECK-LABEL: @xvmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } ++// CHECK-LABEL: @xvmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } ++// CHECK-LABEL: @xvmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } ++// CHECK-LABEL: @xvmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } ++// CHECK-LABEL: @xvmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } ++// CHECK-LABEL: @xvmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } ++// CHECK-LABEL: @xvmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } ++// CHECK-LABEL: @xvmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } ++// CHECK-LABEL: @xvmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } ++// CHECK-LABEL: @xvmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } ++// CHECK-LABEL: @xvmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } ++// CHECK-LABEL: @xvmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } ++// CHECK-LABEL: @xvmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } ++// CHECK-LABEL: @xvmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } ++// CHECK-LABEL: @xvmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } ++// CHECK-LABEL: @xvmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } ++// CHECK-LABEL: @xvmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } ++// CHECK-LABEL: @xvseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } ++// CHECK-LABEL: @xvseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } ++// CHECK-LABEL: @xvseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } ++// CHECK-LABEL: @xvseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } ++// CHECK-LABEL: @xvseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } ++// CHECK-LABEL: @xvseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } ++// CHECK-LABEL: @xvseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } ++// CHECK-LABEL: @xvseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } ++// CHECK-LABEL: @xvslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } ++// CHECK-LABEL: @xvslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } ++// CHECK-LABEL: @xvslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } ++// CHECK-LABEL: @xvslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } ++// CHECK-LABEL: @xvslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } ++// CHECK-LABEL: @xvslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } ++// CHECK-LABEL: @xvslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } ++// CHECK-LABEL: @xvslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } ++// CHECK-LABEL: @xvslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } ++// CHECK-LABEL: @xvslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } ++// CHECK-LABEL: @xvslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } ++// CHECK-LABEL: @xvslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } ++// CHECK-LABEL: @xvslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } ++// CHECK-LABEL: @xvslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } ++// CHECK-LABEL: @xvslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } ++// CHECK-LABEL: @xvslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } ++// CHECK-LABEL: @xvsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } ++// CHECK-LABEL: @xvsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } ++// CHECK-LABEL: @xvsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } ++// CHECK-LABEL: @xvsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } ++// CHECK-LABEL: @xvslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } ++// CHECK-LABEL: @xvslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } ++// CHECK-LABEL: @xvslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } ++// CHECK-LABEL: @xvslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } ++// CHECK-LABEL: @xvsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } ++// CHECK-LABEL: @xvsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } ++// CHECK-LABEL: @xvsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } ++// CHECK-LABEL: @xvsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } ++// CHECK-LABEL: @xvslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } ++// CHECK-LABEL: @xvslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } ++// CHECK-LABEL: @xvslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } ++// CHECK-LABEL: @xvslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } ++// CHECK-LABEL: @xvsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } ++// CHECK-LABEL: @xvsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } ++// CHECK-LABEL: @xvsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } ++// CHECK-LABEL: @xvsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } ++// CHECK-LABEL: @xvsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } ++// CHECK-LABEL: @xvsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } ++// CHECK-LABEL: @xvsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } ++// CHECK-LABEL: @xvsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } ++// CHECK-LABEL: @xvadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } ++// CHECK-LABEL: @xvadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } ++// CHECK-LABEL: @xvadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } ++// CHECK-LABEL: @xvadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } ++// CHECK-LABEL: @xvsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } ++// CHECK-LABEL: @xvsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } ++// CHECK-LABEL: @xvsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } ++// CHECK-LABEL: @xvsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } ++// CHECK-LABEL: @xvsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } ++// CHECK-LABEL: @xvsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } ++// CHECK-LABEL: @xvavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } ++// CHECK-LABEL: @xvavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } ++// CHECK-LABEL: @xvavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } ++// CHECK-LABEL: @xvavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } ++// CHECK-LABEL: @xvavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } ++// CHECK-LABEL: @xvavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } ++// CHECK-LABEL: @xvavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } ++// CHECK-LABEL: @xvavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } ++// CHECK-LABEL: @xvavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } ++// CHECK-LABEL: @xvavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } ++// CHECK-LABEL: @xvavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } ++// CHECK-LABEL: @xvavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } ++// CHECK-LABEL: @xvavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } ++// CHECK-LABEL: @xvavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } ++// CHECK-LABEL: @xvavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } ++// CHECK-LABEL: @xvavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } ++// CHECK-LABEL: @xvssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } ++// CHECK-LABEL: @xvssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } ++// CHECK-LABEL: @xvssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } ++// CHECK-LABEL: @xvssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } ++// CHECK-LABEL: @xvssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } ++// CHECK-LABEL: @xvssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } ++// CHECK-LABEL: @xvssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } ++// CHECK-LABEL: @xvssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } ++// CHECK-LABEL: @xvabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } ++// CHECK-LABEL: @xvabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } ++// CHECK-LABEL: @xvabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } ++// CHECK-LABEL: @xvabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } ++// CHECK-LABEL: @xvabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } ++// CHECK-LABEL: @xvabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } ++// CHECK-LABEL: @xvabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } ++// CHECK-LABEL: @xvabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } ++// CHECK-LABEL: @xvmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } ++// CHECK-LABEL: @xvmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } ++// CHECK-LABEL: @xvmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } ++// CHECK-LABEL: @xvmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } ++// CHECK-LABEL: @xvmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } ++// CHECK-LABEL: @xvdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } ++// CHECK-LABEL: @xvdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } ++// CHECK-LABEL: @xvdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } ++// CHECK-LABEL: @xvdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } ++// CHECK-LABEL: @xvdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } ++// CHECK-LABEL: @xvdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } ++// CHECK-LABEL: @xvdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } ++// CHECK-LABEL: @xvhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } ++// CHECK-LABEL: @xvmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } ++// CHECK-LABEL: @xvmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } ++// CHECK-LABEL: @xvmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } ++// CHECK-LABEL: @xvmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } ++// CHECK-LABEL: @xvmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } ++// CHECK-LABEL: @xvmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } ++// CHECK-LABEL: @xvrepl128vei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } ++// CHECK-LABEL: @xvpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } ++// CHECK-LABEL: @xvpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } ++// CHECK-LABEL: @xvpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } ++// CHECK-LABEL: @xvpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } ++// CHECK-LABEL: @xvpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } ++// CHECK-LABEL: @xvpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } ++// CHECK-LABEL: @xvpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } ++// CHECK-LABEL: @xvpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } ++// CHECK-LABEL: @xvilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } ++// CHECK-LABEL: @xvilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } ++// CHECK-LABEL: @xvilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } ++// CHECK-LABEL: @xvilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } ++// CHECK-LABEL: @xvilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } ++// CHECK-LABEL: @xvilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } ++// CHECK-LABEL: @xvilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } ++// CHECK-LABEL: @xvilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } ++// CHECK-LABEL: @xvpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } ++// CHECK-LABEL: @xvpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } ++// CHECK-LABEL: @xvpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } ++// CHECK-LABEL: @xvpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } ++// CHECK-LABEL: @xvpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } ++// CHECK-LABEL: @xvpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } ++// CHECK-LABEL: @xvpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } ++// CHECK-LABEL: @xvpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } ++// CHECK-LABEL: @xvshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } ++// CHECK-LABEL: @xvand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } ++// CHECK-LABEL: @xvandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } ++// CHECK-LABEL: @xvor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } ++// CHECK-LABEL: @xvori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } ++// CHECK-LABEL: @xvnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } ++// CHECK-LABEL: @xvnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } ++// CHECK-LABEL: @xvxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } ++// CHECK-LABEL: @xvxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } ++// CHECK-LABEL: @xvbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } ++// CHECK-LABEL: @xvbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } ++// CHECK-LABEL: @xvshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } ++// CHECK-LABEL: @xvreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } ++// CHECK-LABEL: @xvreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } ++// CHECK-LABEL: @xvreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } ++// CHECK-LABEL: @xvreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } ++// CHECK-LABEL: @xvpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } ++// CHECK-LABEL: @xvpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } ++// CHECK-LABEL: @xvpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } ++// CHECK-LABEL: @xvpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } ++// CHECK-LABEL: @xvclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } ++// CHECK-LABEL: @xvclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } ++// CHECK-LABEL: @xvclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } ++// CHECK-LABEL: @xvclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } ++// CHECK-LABEL: @xvclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } ++// CHECK-LABEL: @xvclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } ++// CHECK-LABEL: @xvclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } ++// CHECK-LABEL: @xvclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } ++// CHECK-LABEL: @xvfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } ++// CHECK-LABEL: @xvfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } ++// CHECK-LABEL: @xvfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } ++// CHECK-LABEL: @xvfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } ++// CHECK-LABEL: @xvfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } ++// CHECK-LABEL: @xvfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } ++// CHECK-LABEL: @xvfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } ++// CHECK-LABEL: @xvfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } ++// CHECK-LABEL: @xvfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } ++// CHECK-LABEL: @xvfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } ++// CHECK-LABEL: @xvfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } ++// CHECK-LABEL: @xvfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } ++// CHECK-LABEL: @xvfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } ++// CHECK-LABEL: @xvfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } ++// CHECK-LABEL: @xvfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } ++// CHECK-LABEL: @xvfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } ++// CHECK-LABEL: @xvfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } ++// CHECK-LABEL: @xvfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } ++// CHECK-LABEL: @xvfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } ++// CHECK-LABEL: @xvfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } ++// CHECK-LABEL: @xvfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } ++// CHECK-LABEL: @xvfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } ++// CHECK-LABEL: @xvfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } ++// CHECK-LABEL: @xvfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } ++// CHECK-LABEL: @xvfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } ++// CHECK-LABEL: @xvfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } ++// CHECK-LABEL: @xvflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } ++// CHECK-LABEL: @xvflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } ++// CHECK-LABEL: @xvfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } ++// CHECK-LABEL: @xvfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } ++// CHECK-LABEL: @xvfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } ++// CHECK-LABEL: @xvfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } ++// CHECK-LABEL: @xvftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } ++// CHECK-LABEL: @xvftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } ++// CHECK-LABEL: @xvftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } ++// CHECK-LABEL: @xvftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } ++// CHECK-LABEL: @xvftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } ++// CHECK-LABEL: @xvftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } ++// CHECK-LABEL: @xvftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } ++// CHECK-LABEL: @xvftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } ++// CHECK-LABEL: @xvffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } ++// CHECK-LABEL: @xvffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } ++// CHECK-LABEL: @xvffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } ++// CHECK-LABEL: @xvffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } ++// CHECK-LABEL: @xvreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } ++// CHECK-LABEL: @xvreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } ++// CHECK-LABEL: @xvreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } ++// CHECK-LABEL: @xvreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } ++// CHECK-LABEL: @xvpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } ++// CHECK-LABEL: @xvandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } ++// CHECK-LABEL: @xvneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } ++// CHECK-LABEL: @xvneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } ++// CHECK-LABEL: @xvneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } ++// CHECK-LABEL: @xvneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } ++// CHECK-LABEL: @xvmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } ++// CHECK-LABEL: @xvmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } ++// CHECK-LABEL: @xvmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } ++// CHECK-LABEL: @xvmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } ++// CHECK-LABEL: @xvmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } ++// CHECK-LABEL: @xvmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } ++// CHECK-LABEL: @xvmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } ++// CHECK-LABEL: @xvmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } ++// CHECK-LABEL: @xvsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @xvsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @xvsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @xvsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } ++// CHECK-LABEL: @xvsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } ++// CHECK-LABEL: @xvsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } ++// CHECK-LABEL: @xvsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } ++// CHECK-LABEL: @xvssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } ++// CHECK-LABEL: @xvssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } ++// CHECK-LABEL: @xvfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } ++// CHECK-LABEL: @xvbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } ++// CHECK-LABEL: @xvbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } ++// CHECK-LABEL: @xvextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } ++// CHECK-LABEL: @xvmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } ++// CHECK-LABEL: @xvmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } ++// CHECK-LABEL: @xvmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } ++// CHECK-LABEL: @xvmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } ++// CHECK-LABEL: @xvsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } ++// CHECK-LABEL: @xvsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } ++// CHECK-LABEL: @xvsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } ++// CHECK-LABEL: @xvsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } ++// CHECK-LABEL: @xvfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } ++// CHECK-LABEL: @xvftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } ++// CHECK-LABEL: @xvftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } ++// CHECK-LABEL: @xvftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } ++// CHECK-LABEL: @xvftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } ++// CHECK-LABEL: @xvftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } ++// CHECK-LABEL: @xvftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } ++// CHECK-LABEL: @xvffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } ++// CHECK-LABEL: @xvftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } ++// CHECK-LABEL: @xvftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } ++// CHECK-LABEL: @xvftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } ++// CHECK-LABEL: @xvffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } ++// CHECK-LABEL: @xvffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } ++// CHECK-LABEL: @xvftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } ++// CHECK-LABEL: @xvftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } ++// CHECK-LABEL: @xvftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } ++// CHECK-LABEL: @xvftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } ++// CHECK-LABEL: @xvftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } ++// CHECK-LABEL: @xvftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } ++// CHECK-LABEL: @xvftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } ++// CHECK-LABEL: @xvftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } ++// CHECK-LABEL: @xvfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } ++// CHECK-LABEL: @xvfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } ++// CHECK-LABEL: @xvfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } ++// CHECK-LABEL: @xvfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } ++// CHECK-LABEL: @xvfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } ++// CHECK-LABEL: @xvfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } ++// CHECK-LABEL: @xvfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } ++// CHECK-LABEL: @xvfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } ++// CHECK-LABEL: @xvld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } ++// CHECK-LABEL: @xvst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret void ++// ++void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } ++// CHECK-LABEL: @xvstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } ++// CHECK-LABEL: @xvstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } ++// CHECK-LABEL: @xvstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } ++// CHECK-LABEL: @xvstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } ++// CHECK-LABEL: @xvinsve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } ++// CHECK-LABEL: @xvinsve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } ++// CHECK-LABEL: @xvpickve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } ++// CHECK-LABEL: @xvpickve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } ++// CHECK-LABEL: @xvssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } ++// CHECK-LABEL: @xvldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvldi() { return __builtin_lasx_xvldi(1); } ++// CHECK-LABEL: @xvldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } ++// CHECK-LABEL: @xvstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: ret void ++// ++void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } ++// CHECK-LABEL: @xvextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } ++// CHECK-LABEL: @xvinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } ++// CHECK-LABEL: @xvinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } ++// CHECK-LABEL: @xvreplve0_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } ++// CHECK-LABEL: @xvreplve0_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } ++// CHECK-LABEL: @xvreplve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } ++// CHECK-LABEL: @xvreplve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } ++// CHECK-LABEL: @xvreplve0_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } ++// CHECK-LABEL: @vext2xv_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } ++// CHECK-LABEL: @vext2xv_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } ++// CHECK-LABEL: @vext2xv_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } ++// CHECK-LABEL: @vext2xv_w_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } ++// CHECK-LABEL: @vext2xv_d_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } ++// CHECK-LABEL: @vext2xv_d_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } ++// CHECK-LABEL: @vext2xv_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } ++// CHECK-LABEL: @vext2xv_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } ++// CHECK-LABEL: @vext2xv_wu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } ++// CHECK-LABEL: @vext2xv_du_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } ++// CHECK-LABEL: @xvpermi_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } ++// CHECK-LABEL: @xvpermi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } ++// CHECK-LABEL: @xvperm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } ++// CHECK-LABEL: @xvldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } ++// CHECK-LABEL: @xvldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } ++// CHECK-LABEL: @xvldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } ++// CHECK-LABEL: @xvldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } ++// CHECK-LABEL: @xvpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } ++// CHECK-LABEL: @xvaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } ++// CHECK-LABEL: @xvrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } ++// CHECK-LABEL: @xvrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } ++// CHECK-LABEL: @xvrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } ++// CHECK-LABEL: @xvadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } ++// CHECK-LABEL: @xvsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } ++// CHECK-LABEL: @xvmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } ++// CHECK-LABEL: @xvexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } ++// CHECK-LABEL: @xvexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } ++// CHECK-LABEL: @xvexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } ++// CHECK-LABEL: @xvexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } ++// CHECK-LABEL: @xvexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } ++// CHECK-LABEL: @xvexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } ++// CHECK-LABEL: @xvexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } ++// CHECK-LABEL: @xvexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } ++// CHECK-LABEL: @xvrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } ++// CHECK-LABEL: @xvrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } ++// CHECK-LABEL: @xvrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } ++// CHECK-LABEL: @xvrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } ++// CHECK-LABEL: @xvextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } ++// CHECK-LABEL: @xvsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xbnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } ++// CHECK-LABEL: @xbnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } ++// CHECK-LABEL: @xbnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } ++// CHECK-LABEL: @xbnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } ++// CHECK-LABEL: @xbnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } ++// CHECK-LABEL: @xbz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } ++// CHECK-LABEL: @xbz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } ++// CHECK-LABEL: @xbz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } ++// CHECK-LABEL: @xbz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } ++// CHECK-LABEL: @xbz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } ++// CHECK-LABEL: @xvfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } ++// CHECK-LABEL: @xvpickve_d_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } ++// CHECK-LABEL: @xvpickve_w_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } ++// CHECK-LABEL: @xvrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } ++// CHECK-LABEL: @xvrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } ++// CHECK-LABEL: @xvrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } ++// CHECK-LABEL: @xvrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } +-- +2.20.1 + diff --git a/0018-LoongArch-MC-Pre-commit-tests-for-instr-bl-fixupkind.patch b/0018-LoongArch-MC-Pre-commit-tests-for-instr-bl-fixupkind.patch new file mode 100644 index 0000000..d4e5385 --- /dev/null +++ b/0018-LoongArch-MC-Pre-commit-tests-for-instr-bl-fixupkind.patch @@ -0,0 +1,92 @@ +From 8e96f7bd7f5ef8a767bb92253c20ce4f997bec37 Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Tue, 21 Nov 2023 08:34:52 +0800 +Subject: [PATCH 18/27] [LoongArch][MC] Pre-commit tests for instr bl fixupkind + testing (#72826) + +This patch is used to test whether fixupkind for bl can be returned +correctly. When BL has target-flags(loongarch-call), there is no error. +But without this flag, an assertion error will appear. So the test is +just tagged as "Expectedly Failed" now until the following patch fix it. + +(cherry picked from commit 2ca028ce7c6de5f1350440012355a65383b8729a) +--- + .../CodeGen/LoongArch/test_bl_fixupkind.mir | 66 +++++++++++++++++++ + 1 file changed, 66 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir + +diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +new file mode 100644 +index 000000000000..2c1d41be7711 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +@@ -0,0 +1,66 @@ ++## Tagged as "Expectedly Failed" until the following patch fix it ++# XFAIL: * ++# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ ++# RUN: llvm-objdump -d - | FileCheck %s ++ ++# REQUIRES: asserts ++ ++## Check that bl can get fixupkind correctly. ++## When BL has target-flags(loongarch-call), there is no error. But without ++## this flag, an assertion error will appear: ++## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. ++ ++--- | ++ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++ target triple = "loongarch64" ++ ++ define dso_local void @test_bl_fixupkind_with_flag() { ++ ; CHECK-LABEL: test_bl_fixupkind_with_flag ++ ; CHECK: addi.d $sp, $sp, -16 ++ ; CHECK-NEXT: st.d $ra, $sp, 8 ++ ; CHECK-NEXT: bl 0 ++ ; CHECK-NEXT: ld.d $ra, $sp, 8 ++ ; CHECK-NEXT: addi.d $sp, $sp, 16 ++ ; CHECK-NEXT: ret ++ entry: ++ call void @foo() ++ ret void ++ } ++ ++ define dso_local void @test_bl_fixupkind_without_flag() { ++ ; CHECK-LABEL: test_bl_fixupkind_without_flag ++ ; CHECK: addi.d $sp, $sp, -16 ++ ; CHECK-NEXT: st.d $ra, $sp, 8 ++ ; CHECK-NEXT: bl 0 ++ ; CHECK-NEXT: ld.d $ra, $sp, 8 ++ ; CHECK-NEXT: addi.d $sp, $sp, 16 ++ ; CHECK-NEXT: ret ++ entry: ++ call void @foo() ++ ret void ++ } ++ ++ declare dso_local void @foo(...) ++... ++--- ++name: test_bl_fixupkind_with_flag ++tracksRegLiveness: true ++body: | ++ bb.0.entry: ++ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 ++ BL target-flags(loongarch-call) @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 ++ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 ++ PseudoRET ++ ++... ++--- ++name: test_bl_fixupkind_without_flag ++tracksRegLiveness: true ++body: | ++ bb.0.entry: ++ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 ++ BL @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 ++ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 ++ PseudoRET ++ ++... +-- +2.20.1 + diff --git a/0019-LoongArch-Add-codegen-support-for-extractelement-737.patch b/0019-LoongArch-Add-codegen-support-for-extractelement-737.patch new file mode 100644 index 0000000..10bce02 --- /dev/null +++ b/0019-LoongArch-Add-codegen-support-for-extractelement-737.patch @@ -0,0 +1,516 @@ +From 5755e6dc93999099ef4825971cd613cf1754ab6c Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Thu, 30 Nov 2023 17:29:18 +0800 +Subject: [PATCH 19/42] [LoongArch] Add codegen support for extractelement + (#73759) + +Add codegen support for extractelement when enable `lsx` or `lasx` +feature. + +(cherry picked from commit b72456120f1db38ed7068fb592fcf768c6d5cce2) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 2 + + .../Target/LoongArch/LoongArchInstrInfo.cpp | 8 + + .../LoongArch/LoongArchLASXInstrInfo.td | 38 ++++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 38 ++++ + .../lasx/ir-instruction/extractelement.ll | 172 ++++++++++++++++++ + .../lsx/ir-instruction/extractelement.ll | 170 +++++++++++++++++ + 6 files changed, 428 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index d3627cec2e8c..26e94a53b344 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -238,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + // will be `Custom` handled in the future. + setOperationAction(ISD::BUILD_VECTOR, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); +@@ -267,6 +268,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + // FIXME: Same as above. + setOperationAction(ISD::BUILD_VECTOR, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +index ddd1c9943fac..6576100d3b32 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +@@ -90,6 +90,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + Opc = LoongArch::FMOV_S; + } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) { + Opc = LoongArch::FMOV_D; ++ } else if (LoongArch::GPRRegClass.contains(DstReg) && ++ LoongArch::FPR32RegClass.contains(SrcReg)) { ++ // FPR32 -> GPR copies ++ Opc = LoongArch::MOVFR2GR_S; ++ } else if (LoongArch::GPRRegClass.contains(DstReg) && ++ LoongArch::FPR64RegClass.contains(SrcReg)) { ++ // FPR64 -> GPR copies ++ Opc = LoongArch::MOVFR2GR_D; + } else { + // TODO: support other copies. + llvm_unreachable("Impossible reg-to-reg copy"); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index e19aa92266b1..380206ddcf10 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1401,6 +1401,44 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { + def : RegRegStPat; + } + ++// Vector extraction with constant index. ++def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), ++ (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; ++def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), ++ (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; ++def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), ++ (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; ++def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), ++ (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; ++def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), ++ (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; ++def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), ++ (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; ++ ++// Vector extraction with variable index. ++def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), ++ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, ++ i64:$rk), ++ sub_32)), ++ GPR), (i64 24))>; ++def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), ++ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, ++ i64:$rk), ++ sub_32)), ++ GPR), (i64 16))>; ++def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), ++ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), ++ sub_32)), ++ GPR)>; ++def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), ++ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), ++ sub_64)), ++ GPR)>; ++def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), ++ (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; ++def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), ++ (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 9391b1a8a20c..980870e34503 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1501,6 +1501,44 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { + def : RegRegStPat; + } + ++// Vector extraction with constant index. ++def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)), ++ (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>; ++def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)), ++ (VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>; ++def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)), ++ (VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>; ++def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)), ++ (VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>; ++def : Pat<(f32 (vector_extract v4f32:$vj, uimm2:$imm)), ++ (f32 (EXTRACT_SUBREG (VREPLVEI_W v4f32:$vj, uimm2:$imm), sub_32))>; ++def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)), ++ (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>; ++ ++// Vector extraction with variable index. ++def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)), ++ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, ++ i64:$rk), ++ sub_32)), ++ GPR), (i64 24))>; ++def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)), ++ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, ++ i64:$rk), ++ sub_32)), ++ GPR), (i64 16))>; ++def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)), ++ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk), ++ sub_32)), ++ GPR)>; ++def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)), ++ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk), ++ sub_64)), ++ GPR)>; ++def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), ++ (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>; ++def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), ++ (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +new file mode 100644 +index 000000000000..78f584cd09a8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +@@ -0,0 +1,172 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @extract_32xi8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_32xi8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 ++; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %e = extractelement <32 x i8> %v, i32 1 ++ store i8 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_16xi16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_16xi16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 ++; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %e = extractelement <16 x i16> %v, i32 1 ++ store i16 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xi32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_8xi32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 ++; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i32>, ptr %src ++ %e = extractelement <8 x i32> %v, i32 1 ++ store i32 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xi64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_4xi64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 ++; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i64>, ptr %src ++ %e = extractelement <4 x i64> %v, i32 1 ++ store i64 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_8xfloat: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: ori $a0, $zero, 7 ++; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 ++; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x float>, ptr %src ++ %e = extractelement <8 x float> %v, i32 7 ++ store float %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_4xdouble: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: ori $a0, $zero, 3 ++; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 ++; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x double>, ptr %src ++ %e = extractelement <4 x double> %v, i32 3 ++ store double %e, ptr %dst ++ ret void ++} ++ ++define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_32xi8_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: srai.w $a0, $a0, 24 ++; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %e = extractelement <32 x i8> %v, i32 %idx ++ store i8 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_16xi16_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: srai.w $a0, $a0, 16 ++; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %e = extractelement <16 x i16> %v, i32 %idx ++ store i16 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_8xi32_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i32>, ptr %src ++ %e = extractelement <8 x i32> %v, i32 %idx ++ store i32 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_4xi64_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 ++; CHECK-NEXT: movfr2gr.d $a0, $fa0 ++; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i64>, ptr %src ++ %e = extractelement <4 x i64> %v, i32 %idx ++ store i64 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_8xfloat_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 ++; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x float>, ptr %src ++ %e = extractelement <8 x float> %v, i32 %idx ++ store float %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_4xdouble_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 ++; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x double>, ptr %src ++ %e = extractelement <4 x double> %v, i32 %idx ++ store double %e, ptr %dst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll +new file mode 100644 +index 000000000000..b8798c97861e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll +@@ -0,0 +1,170 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @extract_16xi8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_16xi8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 ++; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i8>, ptr %src ++ %e = extractelement <16 x i8> %v, i32 1 ++ store i8 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xi16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_8xi16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 ++; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i16>, ptr %src ++ %e = extractelement <8 x i16> %v, i32 1 ++ store i16 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xi32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_4xi32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 ++; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %e = extractelement <4 x i32> %v, i32 1 ++ store i32 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_2xi64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_2xi64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 ++; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x i64>, ptr %src ++ %e = extractelement <2 x i64> %v, i32 1 ++ store i64 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xfloat(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_4xfloat: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 ++; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x float>, ptr %src ++ %e = extractelement <4 x float> %v, i32 1 ++ store float %e, ptr %dst ++ ret void ++} ++ ++define void @extract_2xdouble(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_2xdouble: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 ++; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x double>, ptr %src ++ %e = extractelement <2 x double> %v, i32 1 ++ store double %e, ptr %dst ++ ret void ++} ++ ++define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_16xi8_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.b $vr0, $vr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: srai.w $a0, $a0, 24 ++; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i8>, ptr %src ++ %e = extractelement <16 x i8> %v, i32 %idx ++ store i8 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_8xi16_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.h $vr0, $vr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: srai.w $a0, $a0, 16 ++; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i16>, ptr %src ++ %e = extractelement <8 x i16> %v, i32 %idx ++ store i16 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_4xi32_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %e = extractelement <4 x i32> %v, i32 %idx ++ store i32 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_2xi64_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 ++; CHECK-NEXT: movfr2gr.d $a0, $fa0 ++; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x i64>, ptr %src ++ %e = extractelement <2 x i64> %v, i32 %idx ++ store i64 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_4xfloat_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 ++; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x float>, ptr %src ++ %e = extractelement <4 x float> %v, i32 %idx ++ store float %e, ptr %dst ++ ret void ++} ++ ++define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_2xdouble_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 ++; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x double>, ptr %src ++ %e = extractelement <2 x double> %v, i32 %idx ++ store double %e, ptr %dst ++ ret void ++} +-- +2.20.1 + diff --git a/0019-LoongArch-Add-definitions-and-feature-frecipe-for-FP.patch b/0019-LoongArch-Add-definitions-and-feature-frecipe-for-FP.patch new file mode 100644 index 0000000..e675893 --- /dev/null +++ b/0019-LoongArch-Add-definitions-and-feature-frecipe-for-FP.patch @@ -0,0 +1,876 @@ +From 15497a9b983a09cff368feb019820828a40b57bb Mon Sep 17 00:00:00 2001 +From: Ami-zhang +Date: Tue, 23 Jan 2024 14:24:58 +0800 +Subject: [PATCH 19/23] [LoongArch] Add definitions and feature 'frecipe' for + FP approximation intrinsics/builtins (#78962) + +This PR adds definitions and 'frecipe' feature for FP approximation +intrinsics/builtins. In additions, this adds and complements relative +testcases. + +(cherry picked from commit fcb8342a219ada8ec641790a4c8a9f969d7d64ee) +--- + .../clang/Basic/BuiltinsLoongArchBase.def | 5 +++ + .../clang/Basic/BuiltinsLoongArchLASX.def | 6 +++ + .../clang/Basic/BuiltinsLoongArchLSX.def | 6 +++ + clang/lib/Headers/larchintrin.h | 12 +++++ + clang/lib/Headers/lasxintrin.h | 24 ++++++++++ + clang/lib/Headers/lsxintrin.h | 24 ++++++++++ + .../LoongArch/builtin-dbl-approximate.c | 45 +++++++++++++++++++ + .../LoongArch/builtin-flt-approximate.c | 45 +++++++++++++++++++ + .../CodeGen/LoongArch/intrinsic-la64-error.c | 21 +++++++++ + .../lasx/builtin-approximate-alias.c | 37 +++++++++++++++ + .../LoongArch/lasx/builtin-approximate.c | 38 ++++++++++++++++ + .../LoongArch/lsx/builtin-approximate-alias.c | 37 +++++++++++++++ + .../LoongArch/lsx/builtin-approximate.c | 38 ++++++++++++++++ + llvm/include/llvm/IR/IntrinsicsLoongArch.td | 13 ++++++ + llvm/lib/Target/LoongArch/LoongArch.td | 7 +++ + .../LoongArch/LoongArchFloat32InstrInfo.td | 6 +++ + .../LoongArch/LoongArchFloat64InstrInfo.td | 6 +++ + .../LoongArch/LoongArchLASXInstrInfo.td | 10 +++++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 10 +++++ + .../lib/Target/LoongArch/LoongArchSubtarget.h | 2 + + .../LoongArch/intrinsic-frecipe-dbl.ll | 26 +++++++++++ + .../LoongArch/intrinsic-frecipe-flt.ll | 26 +++++++++++ + .../LoongArch/lasx/intrinsic-frecipe.ll | 26 +++++++++++ + .../LoongArch/lasx/intrinsic-frsqrte.ll | 26 +++++++++++ + .../LoongArch/lsx/intrinsic-frecipe.ll | 26 +++++++++++ + .../LoongArch/lsx/intrinsic-frsqrte.ll | 26 +++++++++++ + 26 files changed, 548 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c + create mode 100644 clang/test/CodeGen/LoongArch/builtin-flt-approximate.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c + create mode 100644 llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def +index cbb239223aae..a5a07c167908 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArchBase.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def +@@ -51,3 +51,8 @@ TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_frecipe_s, "ff", "nc", "f,frecipe") ++TARGET_BUILTIN(__builtin_loongarch_frecipe_d, "dd", "nc", "d,frecipe") ++TARGET_BUILTIN(__builtin_loongarch_frsqrte_s, "ff", "nc", "f,frecipe") ++TARGET_BUILTIN(__builtin_loongarch_frsqrte_d, "dd", "nc", "d,frecipe") +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +index 3de200f665b6..4cf51cc000f6 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +@@ -657,9 +657,15 @@ TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") + TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") + TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") + ++TARGET_BUILTIN(__builtin_lasx_xvfrecipe_s, "V8fV8f", "nc", "lasx,frecipe") ++TARGET_BUILTIN(__builtin_lasx_xvfrecipe_d, "V4dV4d", "nc", "lasx,frecipe") ++ + TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") + TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") + ++TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_s, "V8fV8f", "nc", "lasx,frecipe") ++TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_d, "V4dV4d", "nc", "lasx,frecipe") ++ + TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") + TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") + TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +index 8e6aec886c50..c90f4dc5458f 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +@@ -641,9 +641,15 @@ TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") + TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") + TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") + ++TARGET_BUILTIN(__builtin_lsx_vfrecipe_s, "V4fV4f", "nc", "lsx,frecipe") ++TARGET_BUILTIN(__builtin_lsx_vfrecipe_d, "V2dV2d", "nc", "lsx,frecipe") ++ + TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") + TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") + ++TARGET_BUILTIN(__builtin_lsx_vfrsqrte_s, "V4fV4f", "nc", "lsx,frecipe") ++TARGET_BUILTIN(__builtin_lsx_vfrsqrte_d, "V2dV2d", "nc", "lsx,frecipe") ++ + TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") + TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") + +diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h +index 24dd29ce91ff..f4218295919a 100644 +--- a/clang/lib/Headers/larchintrin.h ++++ b/clang/lib/Headers/larchintrin.h +@@ -228,6 +228,18 @@ extern __inline void + ((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2))) + #endif + ++#define __frecipe_s(/*float*/ _1) \ ++ (float)__builtin_loongarch_frecipe_s((float)_1) ++ ++#define __frecipe_d(/*double*/ _1) \ ++ (double)__builtin_loongarch_frecipe_d((double)_1) ++ ++#define __frsqrte_s(/*float*/ _1) \ ++ (float)__builtin_loongarch_frsqrte_s((float)_1) ++ ++#define __frsqrte_d(/*double*/ _1) \ ++ (double)__builtin_loongarch_frsqrte_d((double)_1) ++ + #ifdef __cplusplus + } + #endif +diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h +index 6b4d5012a24b..dafc2a2f3e6a 100644 +--- a/clang/lib/Headers/lasxintrin.h ++++ b/clang/lib/Headers/lasxintrin.h +@@ -1726,6 +1726,18 @@ extern __inline + return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); + } + ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrecipe_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrecipe_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrecipe_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrecipe_d((v4f64)_1); ++} ++ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrint_s(__m256 _1) { +@@ -1750,6 +1762,18 @@ extern __inline + return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); + } + ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrsqrte_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrsqrte_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrsqrte_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrsqrte_d((v4f64)_1); ++} ++ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvflogb_s(__m256 _1) { +diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h +index a29bc7757ab5..f347955ce6fb 100644 +--- a/clang/lib/Headers/lsxintrin.h ++++ b/clang/lib/Headers/lsxintrin.h +@@ -1776,6 +1776,18 @@ extern __inline + return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); + } + ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrecipe_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrecipe_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrecipe_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrecipe_d((v2f64)_1); ++} ++ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrint_s(__m128 _1) { +@@ -1800,6 +1812,18 @@ extern __inline + return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); + } + ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrsqrte_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrsqrte_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrsqrte_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrsqrte_d((v2f64)_1); ++} ++ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vflogb_s(__m128 _1) { +diff --git a/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c b/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c +new file mode 100644 +index 000000000000..e5fe684346c0 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c +@@ -0,0 +1,45 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 ++// RUN: %clang_cc1 -triple loongarch32 -target-feature +d -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +d -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++// CHECK-LABEL: @frecipe_d ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[A:%.*]]) ++// CHECK-NEXT: ret double [[TMP0]] ++// ++double frecipe_d (double _1) ++{ ++ return __builtin_loongarch_frecipe_d (_1); ++} ++ ++// CHECK-LABEL: @frsqrte_d ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[A:%.*]]) ++// CHECK-NEXT: ret double [[TMP0]] ++// ++double frsqrte_d (double _1) ++{ ++ return __builtin_loongarch_frsqrte_d (_1); ++} ++ ++// CHECK-LABEL: @frecipe_d_alia ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[A:%.*]]) ++// CHECK-NEXT: ret double [[TMP0]] ++// ++double frecipe_d_alia (double _1) ++{ ++ return __frecipe_d (_1); ++} ++ ++// CHECK-LABEL: @frsqrte_d_alia ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[A:%.*]]) ++// CHECK-NEXT: ret double [[TMP0]] ++// ++double frsqrte_d_alia (double _1) ++{ ++ return __frsqrte_d (_1); ++} +diff --git a/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c b/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c +new file mode 100644 +index 000000000000..47bb47084364 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c +@@ -0,0 +1,45 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 ++// RUN: %clang_cc1 -triple loongarch32 -target-feature +f -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++// CHECK-LABEL: @frecipe_s ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[A:%.*]]) ++// CHECK-NEXT: ret float [[TMP0]] ++// ++float frecipe_s (float _1) ++{ ++ return __builtin_loongarch_frecipe_s (_1); ++} ++ ++// CHECK-LABEL: @frsqrte_s ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[A:%.*]]) ++// CHECK-NEXT: ret float [[TMP0]] ++// ++float frsqrte_s (float _1) ++{ ++ return __builtin_loongarch_frsqrte_s (_1); ++} ++ ++// CHECK-LABEL: @frecipe_s_alia ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[A:%.*]]) ++// CHECK-NEXT: ret float [[TMP0]] ++// ++float frecipe_s_alia (float _1) ++{ ++ return __frecipe_s (_1); ++} ++ ++// CHECK-LABEL: @frsqrte_s_alia ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[A:%.*]]) ++// CHECK-NEXT: ret float [[TMP0]] ++// ++float frsqrte_s_alia (float _1) ++{ ++ return __frsqrte_s (_1); ++} +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c +index efb3b94175cf..a3242dfd41e9 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c +@@ -1,7 +1,28 @@ + // RUN: %clang_cc1 -triple loongarch64 -emit-llvm -S -verify %s -o /dev/null ++// RUN: not %clang_cc1 -triple loongarch64 -DFEATURE_CHECK -emit-llvm %s -o /dev/null 2>&1 \ ++// RUN: | FileCheck %s + + #include + ++#ifdef FEATURE_CHECK ++void test_feature(unsigned long *v_ul, int *v_i, float a, double b) { ++// CHECK: error: '__builtin_loongarch_cacop_w' needs target feature 32bit ++ __builtin_loongarch_cacop_w(1, v_ul[0], 1024); ++// CHECK: error: '__builtin_loongarch_movfcsr2gr' needs target feature f ++ v_i[0] = __builtin_loongarch_movfcsr2gr(1); ++// CHECK: error: '__builtin_loongarch_movgr2fcsr' needs target feature f ++ __builtin_loongarch_movgr2fcsr(1, v_i[1]); ++// CHECK: error: '__builtin_loongarch_frecipe_s' needs target feature f,frecipe ++ float f1 = __builtin_loongarch_frecipe_s(a); ++// CHECK: error: '__builtin_loongarch_frsqrte_s' needs target feature f,frecipe ++ float f2 = __builtin_loongarch_frsqrte_s(a); ++// CHECK: error: '__builtin_loongarch_frecipe_d' needs target feature d,frecipe ++ double d1 = __builtin_loongarch_frecipe_d(b); ++// CHECK: error: '__builtin_loongarch_frsqrte_d' needs target feature d,frecipe ++ double d2 = __builtin_loongarch_frsqrte_d(b); ++} ++#endif ++ + void csrrd_d(int a) { + __builtin_loongarch_csrrd_d(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrrd_d(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c +new file mode 100644 +index 000000000000..b79f93940399 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c +@@ -0,0 +1,37 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++// CHECK-LABEL: @xvfrecipe_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void ++// ++v8f32 xvfrecipe_s(v8f32 _1) { return __lasx_xvfrecipe_s(_1); } ++// CHECK-LABEL: @xvfrecipe_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void ++// ++v4f64 xvfrecipe_d(v4f64 _1) { return __lasx_xvfrecipe_d(_1); } ++// CHECK-LABEL: @xvfrsqrte_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void ++// ++v8f32 xvfrsqrte_s(v8f32 _1) { return __lasx_xvfrsqrte_s(_1); } ++// CHECK-LABEL: @xvfrsqrte_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void ++// ++v4f64 xvfrsqrte_d(v4f64 _1) { return __lasx_xvfrsqrte_d(_1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c +new file mode 100644 +index 000000000000..63e9ba639ea2 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c +@@ -0,0 +1,38 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++ ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++ ++// CHECK-LABEL: @xvfrecipe_s ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void ++// ++v8f32 xvfrecipe_s(v8f32 _1) { return __builtin_lasx_xvfrecipe_s(_1); } ++// CHECK-LABEL: @xvfrecipe_d ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void ++// ++v4f64 xvfrecipe_d(v4f64 _1) { return __builtin_lasx_xvfrecipe_d(_1); } ++// CHECK-LABEL: @xvfrsqrte_s ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void ++// ++v8f32 xvfrsqrte_s(v8f32 _1) { return __builtin_lasx_xvfrsqrte_s(_1); } ++// CHECK-LABEL: @xvfrsqrte_d ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void ++// ++v4f64 xvfrsqrte_d(v4f64 _1) { return __builtin_lasx_xvfrsqrte_d(_1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c +new file mode 100644 +index 000000000000..f26f032c878e +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c +@@ -0,0 +1,37 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++// CHECK-LABEL: @vfrecipe_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] ++// ++v4f32 vfrecipe_s(v4f32 _1) { return __lsx_vfrecipe_s(_1); } ++// CHECK-LABEL: @vfrecipe_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] ++// ++v2f64 vfrecipe_d(v2f64 _1) { return __lsx_vfrecipe_d(_1); } ++// CHECK-LABEL: @vfrsqrte_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] ++// ++v4f32 vfrsqrte_s(v4f32 _1) { return __lsx_vfrsqrte_s(_1); } ++// CHECK-LABEL: @vfrsqrte_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] ++// ++v2f64 vfrsqrte_d(v2f64 _1) { return __lsx_vfrsqrte_d(_1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c +new file mode 100644 +index 000000000000..39fa1663db34 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c +@@ -0,0 +1,38 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++ ++typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); ++typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); ++ ++// CHECK-LABEL: @vfrecipe_s ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] ++// ++v4f32 vfrecipe_s (v4f32 _1) { return __builtin_lsx_vfrecipe_s (_1); } ++// CHECK-LABEL: @vfrecipe_d ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] ++// ++v2f64 vfrecipe_d (v2f64 _1) { return __builtin_lsx_vfrecipe_d (_1); } ++// CHECK-LABEL: @vfrsqrte_s ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] ++// ++v4f32 vfrsqrte_s (v4f32 _1) { return __builtin_lsx_vfrsqrte_s (_1); } ++// CHECK-LABEL: @vfrsqrte_d ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] ++// ++v2f64 vfrsqrte_d (v2f64 _1) { return __builtin_lsx_vfrsqrte_d (_1); } +diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +index 685deaec7709..9002076e7aec 100644 +--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td ++++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +@@ -122,6 +122,15 @@ def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; + def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; ++ ++def int_loongarch_frecipe_s : BaseInt<[llvm_float_ty], [llvm_float_ty], ++ [IntrNoMem]>; ++def int_loongarch_frecipe_d : BaseInt<[llvm_double_ty], [llvm_double_ty], ++ [IntrNoMem]>; ++def int_loongarch_frsqrte_s : BaseInt<[llvm_float_ty], [llvm_float_ty], ++ [IntrNoMem]>; ++def int_loongarch_frsqrte_d : BaseInt<[llvm_double_ty], [llvm_double_ty], ++ [IntrNoMem]>; + } // TargetPrefix = "loongarch" + + /// Vector intrinsic +@@ -527,10 +536,12 @@ foreach inst = ["vfmadd_d", "vfmsub_d", "vfnmadd_d", "vfnmsub_d"] in + [IntrNoMem]>; + + foreach inst = ["vflogb_s", "vfsqrt_s", "vfrecip_s", "vfrsqrt_s", "vfrint_s", ++ "vfrecipe_s", "vfrsqrte_s", + "vfrintrne_s", "vfrintrz_s", "vfrintrp_s", "vfrintrm_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; + foreach inst = ["vflogb_d", "vfsqrt_d", "vfrecip_d", "vfrsqrt_d", "vfrint_d", ++ "vfrecipe_d", "vfrsqrte_d", + "vfrintrne_d", "vfrintrz_d", "vfrintrp_d", "vfrintrm_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2f64_ty], + [IntrNoMem]>; +@@ -1044,10 +1055,12 @@ foreach inst = ["xvfmadd_d", "xvfmsub_d", "xvfnmadd_d", "xvfnmsub_d"] in + [IntrNoMem]>; + + foreach inst = ["xvflogb_s", "xvfsqrt_s", "xvfrecip_s", "xvfrsqrt_s", "xvfrint_s", ++ "xvfrecipe_s", "xvfrsqrte_s", + "xvfrintrne_s", "xvfrintrz_s", "xvfrintrp_s", "xvfrintrm_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty], + [IntrNoMem]>; + foreach inst = ["xvflogb_d", "xvfsqrt_d", "xvfrecip_d", "xvfrsqrt_d", "xvfrint_d", ++ "xvfrecipe_d", "xvfrsqrte_d", + "xvfrintrne_d", "xvfrintrz_d", "xvfrintrp_d", "xvfrintrm_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td +index 2a4c991a43b0..5573e5415d26 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch.td ++++ b/llvm/lib/Target/LoongArch/LoongArch.td +@@ -110,6 +110,13 @@ def FeatureAutoVec + : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", + "Experimental auto vectorization">; + ++// Floating point approximation operation ++def FeatureFrecipe ++ : SubtargetFeature<"frecipe", "HasFrecipe", "true", ++ "Support frecipe.{s/d} and frsqrte.{s/d} instructions.">; ++def HasFrecipe : Predicate<"Subtarget->hasFrecipe()">; ++ ++ + //===----------------------------------------------------------------------===// + // Registers, instruction descriptions ... + //===----------------------------------------------------------------------===// +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +index f30837912e75..e27896768818 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +@@ -281,6 +281,12 @@ def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>; + // FP reciprocal operation + def : Pat<(fdiv fpimm1, FPR32:$src), (FRECIP_S $src)>; + ++let Predicates = [HasFrecipe] in { ++// FP approximate reciprocal operation ++def : Pat<(int_loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>; ++def : Pat<(int_loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>; ++} ++ + // fmadd.s: fj * fk + fa + def : Pat<(fma FPR32:$fj, FPR32:$fk, FPR32:$fa), (FMADD_S $fj, $fk, $fa)>; + +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +index 0ea4c564b045..26bed67ac222 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +@@ -242,6 +242,12 @@ def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>; + // FP reciprocal operation + def : Pat<(fdiv fpimm1, FPR64:$src), (FRECIP_D $src)>; + ++let Predicates = [HasFrecipe] in { ++// FP approximate reciprocal operation ++def : Pat<(int_loongarch_frecipe_d FPR64:$src), (FRECIPE_D FPR64:$src)>; ++def : Pat<(int_loongarch_frsqrte_d FPR64:$src), (FRSQRTE_D FPR64:$src)>; ++} ++ + // fmadd.d: fj * fk + fa + def : Pat<(fma FPR64:$fj, FPR64:$fk, FPR64:$fa), (FMADD_D $fj, $fk, $fa)>; + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 454915ac8c0a..6f1969bf8cae 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -2080,6 +2080,16 @@ foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_ + def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + ++// 256-Bit vector FP approximate reciprocal operation ++let Predicates = [HasFrecipe] in { ++foreach Inst = ["XVFRECIPE_S", "XVFRSQRTE_S"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v8f32 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++foreach Inst = ["XVFRECIPE_D", "XVFRSQRTE_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++} ++ + def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), + (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 6d60d7074ec3..0580683c3ce3 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -2195,6 +2195,16 @@ foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", + def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + ++// 128-Bit vector FP approximate reciprocal operation ++let Predicates = [HasFrecipe] in { ++foreach Inst = ["VFRECIPE_S", "VFRSQRTE_S"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v4f32 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++foreach Inst = ["VFRECIPE_D", "VFRSQRTE_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++} ++ + // load + def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), + (VLD GPR:$rj, (to_valid_timm timm:$imm))>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +index 174e4cba8326..11c0b39e176e 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h ++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +@@ -45,6 +45,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { + bool HasUAL = false; + bool HasLinkerRelax = false; + bool HasExpAutoVec = false; ++ bool HasFrecipe = false; + unsigned GRLen = 32; + MVT GRLenVT = MVT::i32; + LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; +@@ -104,6 +105,7 @@ public: + bool hasUAL() const { return HasUAL; } + bool hasLinkerRelax() const { return HasLinkerRelax; } + bool hasExpAutoVec() const { return HasExpAutoVec; } ++ bool hasFrecipe() const { return HasFrecipe; } + MVT getGRLenVT() const { return GRLenVT; } + unsigned getGRLen() const { return GRLen; } + LoongArchABI::ABI getTargetABI() const { return TargetABI; } +diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll +new file mode 100644 +index 000000000000..9f572500caa0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll +@@ -0,0 +1,26 @@ ++; RUN: llc --mtriple=loongarch32 --mattr=+d,+frecipe < %s | FileCheck %s ++; RUN: llc --mtriple=loongarch64 --mattr=+d,+frecipe < %s | FileCheck %s ++ ++declare double @llvm.loongarch.frecipe.d(double) ++ ++define double @frecipe_d(double %a) { ++; CHECK-LABEL: frecipe_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: frecipe.d $fa0, $fa0 ++; CHECK-NEXT: ret ++entry: ++ %res = call double @llvm.loongarch.frecipe.d(double %a) ++ ret double %res ++} ++ ++declare double @llvm.loongarch.frsqrte.d(double) ++ ++define double @frsqrte_d(double %a) { ++; CHECK-LABEL: frsqrte_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: frsqrte.d $fa0, $fa0 ++; CHECK-NEXT: ret ++entry: ++ %res = call double @llvm.loongarch.frsqrte.d(double %a) ++ ret double %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll +new file mode 100644 +index 000000000000..0b2029f2e44a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll +@@ -0,0 +1,26 @@ ++; RUN: llc --mtriple=loongarch32 --mattr=+f,+frecipe < %s | FileCheck %s ++; RUN: llc --mtriple=loongarch64 --mattr=+f,+frecipe < %s | FileCheck %s ++ ++declare float @llvm.loongarch.frecipe.s(float) ++ ++define float @frecipe_s(float %a) { ++; CHECK-LABEL: frecipe_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: frecipe.s $fa0, $fa0 ++; CHECK-NEXT: ret ++entry: ++ %res = call float @llvm.loongarch.frecipe.s(float %a) ++ ret float %res ++} ++ ++declare float @llvm.loongarch.frsqrte.s(float) ++ ++define float @frsqrte_s(float %a) { ++; CHECK-LABEL: frsqrte_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: frsqrte.s $fa0, $fa0 ++; CHECK-NEXT: ret ++entry: ++ %res = call float @llvm.loongarch.frsqrte.s(float %a) ++ ret float %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll +new file mode 100644 +index 000000000000..215436823af8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrecipe_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrecipe_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrecipe.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrecipe_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrecipe_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrecipe.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll +new file mode 100644 +index 000000000000..ad36c3aa5c29 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrsqrte_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrsqrte_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrsqrte.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrsqrte_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrsqrte_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrsqrte.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll +new file mode 100644 +index 000000000000..1b7a97d9f972 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrecipe_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrecipe_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrecipe.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrecipe_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrecipe_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrecipe.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll +new file mode 100644 +index 000000000000..3cd6c78e87d7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrsqrte_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrsqrte_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrsqrte.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrsqrte_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrsqrte_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrsqrte.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> %va) ++ ret <2 x double> %res ++} +-- +2.20.1 + diff --git a/0019-LoongArch-MC-Support-to-get-the-FixupKind-for-BL-729.patch b/0019-LoongArch-MC-Support-to-get-the-FixupKind-for-BL-729.patch new file mode 100644 index 0000000..7f9290f --- /dev/null +++ b/0019-LoongArch-MC-Support-to-get-the-FixupKind-for-BL-729.patch @@ -0,0 +1,53 @@ +From ca4406226dec4a681a51aaee06dc1d6ba132c8a5 Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Tue, 21 Nov 2023 19:00:29 +0800 +Subject: [PATCH 19/27] [LoongArch][MC] Support to get the FixupKind for BL + (#72938) + +Previously, bolt could not get FixupKind for BL correctly, because bolt +cannot get target-flags for BL. Here just add support in MCCodeEmitter. + +Fixes https://github.com/llvm/llvm-project/pull/72826. + +(cherry picked from commit 775d2f3201cf7fb657aaf58d1b37c130bd9eb8f9) +--- + .../LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp | 1 + + llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir | 8 ++------ + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +index 08c0820cb862..09d92ac9aa3a 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +@@ -263,6 +263,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, + FixupKind = LoongArch::fixup_loongarch_b21; + break; + case LoongArch::B: ++ case LoongArch::BL: + FixupKind = LoongArch::fixup_loongarch_b26; + break; + } +diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +index 2c1d41be7711..70cd5fb8d7eb 100644 +--- a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir ++++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +@@ -1,14 +1,10 @@ +-## Tagged as "Expectedly Failed" until the following patch fix it +-# XFAIL: * + # RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ + # RUN: llvm-objdump -d - | FileCheck %s + + # REQUIRES: asserts + +-## Check that bl can get fixupkind correctly. +-## When BL has target-flags(loongarch-call), there is no error. But without +-## this flag, an assertion error will appear: +-## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. ++## Check that bl can get fixupkind correctly, whether BL contains ++## target-flags(loongarch-call) or not. + + --- | + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +-- +2.20.1 + diff --git a/0020-LoongArch-Add-some-binary-IR-instructions-testcases-.patch b/0020-LoongArch-Add-some-binary-IR-instructions-testcases-.patch new file mode 100644 index 0000000..9bbc715 --- /dev/null +++ b/0020-LoongArch-Add-some-binary-IR-instructions-testcases-.patch @@ -0,0 +1,1526 @@ +From cb9eb018760fee3f3767b4b5dc0f56ef7afd7d1c Mon Sep 17 00:00:00 2001 +From: leecheechen +Date: Thu, 30 Nov 2023 21:41:18 +0800 +Subject: [PATCH 20/42] [LoongArch] Add some binary IR instructions testcases + for LSX (#73929) + +The IR instructions include: +- Binary Operations: add fadd sub fsub mul fmul udiv sdiv fdiv +- Bitwise Binary Operations: shl lshr ashr + +(cherry picked from commit 29a0f3ec2b47630ce229953fe7250e741b6c10b6) + +--- + .../LoongArch/lsx/ir-instruction/add.ll | 122 +++++++++ + .../LoongArch/lsx/ir-instruction/ashr.ll | 178 +++++++++++++ + .../LoongArch/lsx/ir-instruction/fadd.ll | 34 +++ + .../LoongArch/lsx/ir-instruction/fdiv.ll | 34 +++ + .../LoongArch/lsx/ir-instruction/fmul.ll | 34 +++ + .../LoongArch/lsx/ir-instruction/fsub.ll | 34 +++ + .../LoongArch/lsx/ir-instruction/lshr.ll | 178 +++++++++++++ + .../LoongArch/lsx/ir-instruction/mul.ll | 242 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/sdiv.ll | 134 ++++++++++ + .../LoongArch/lsx/ir-instruction/shl.ll | 178 +++++++++++++ + .../LoongArch/lsx/ir-instruction/sub.ll | 122 +++++++++ + .../LoongArch/lsx/ir-instruction/udiv.ll | 122 +++++++++ + 12 files changed, 1412 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll +new file mode 100644 +index 000000000000..2a7c37c2ae34 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vadd.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = add <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vadd.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = add <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vadd.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = add <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vadd.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = add <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v16i8_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v16i8_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = add <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v8i16_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v8i16_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = add <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = add <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v2i64_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v2i64_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = add <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll +new file mode 100644 +index 000000000000..fbc570d77ba8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @ashr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsra.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = ashr <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsra.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = ashr <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsra.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = ashr <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsra.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = ashr <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v16i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = ashr <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v16i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = ashr <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v8i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = ashr <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v8i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = ashr <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v4i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = ashr <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = ashr <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v2i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v2i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = ashr <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v2i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v2i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = ashr <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll +new file mode 100644 +index 000000000000..1fa1f611c4a3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fadd_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fadd_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfadd.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = fadd <4 x float> %v0, %v1 ++ store <4 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fadd_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fadd_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfadd.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = fadd <2 x double> %v0, %v1 ++ store <2 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +new file mode 100644 +index 000000000000..eb7c8bd9616e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fdiv_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfdiv.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = fdiv <4 x float> %v0, %v1 ++ store <4 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fdiv_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfdiv.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = fdiv <2 x double> %v0, %v1 ++ store <2 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll +new file mode 100644 +index 000000000000..e7fb527f7805 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fmul_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fmul_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = fmul <4 x float> %v0, %v1 ++ store <4 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fmul_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fmul_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = fmul <2 x double> %v0, %v1 ++ store <2 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll +new file mode 100644 +index 000000000000..df98182321da +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fsub_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fsub_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = fsub <4 x float> %v0, %v1 ++ store <4 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fsub_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fsub_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = fsub <2 x double> %v0, %v1 ++ store <2 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll +new file mode 100644 +index 000000000000..dada52f93060 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @lshr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsrl.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = lshr <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsrl.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = lshr <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsrl.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = lshr <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsrl.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = lshr <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v16i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = lshr <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v16i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = lshr <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v8i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = lshr <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v8i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = lshr <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v4i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = lshr <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = lshr <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v2i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v2i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = lshr <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v2i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v2i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = lshr <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll +new file mode 100644 +index 000000000000..5060240cd8b1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll +@@ -0,0 +1,242 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @mul_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = mul <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = mul <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = mul <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = mul <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_square_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vmul.b $vr0, $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = mul <16 x i8> %v0, %v0 ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v8i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vmul.h $vr0, $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = mul <8 x i16> %v0, %v0 ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v4i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vmul.w $vr0, $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = mul <4 x i32> %v0, %v0 ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v2i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vmul.d $vr0, $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = mul <2 x i64> %v0, %v0 ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v16i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v16i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.b $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = mul <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v8i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v8i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.h $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = mul <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v4i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v4i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.w $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = mul <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v2i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v2i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.d $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = mul <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v16i8_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a2, $zero, 17 ++; CHECK-NEXT: vreplgr2vr.b $vr0, $a2 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = mul <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v8i16_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a2, $zero, 17 ++; CHECK-NEXT: vreplgr2vr.h $vr0, $a2 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = mul <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v4i32_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a2, $zero, 17 ++; CHECK-NEXT: vreplgr2vr.w $vr0, $a2 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = mul <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v2i64_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a2, $zero, 17 ++; CHECK-NEXT: vreplgr2vr.d $vr0, $a2 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = mul <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll +new file mode 100644 +index 000000000000..b68f73a74913 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll +@@ -0,0 +1,134 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @sdiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = sdiv <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = sdiv <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = sdiv <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = sdiv <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v16i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v16i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.b $vr1, $vr0, 7 ++; CHECK-NEXT: vsrli.b $vr1, $vr1, 5 ++; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: vsrai.b $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = sdiv <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v8i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v8i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.h $vr1, $vr0, 15 ++; CHECK-NEXT: vsrli.h $vr1, $vr1, 13 ++; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: vsrai.h $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = sdiv <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v4i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v4i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.w $vr1, $vr0, 31 ++; CHECK-NEXT: vsrli.w $vr1, $vr1, 29 ++; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: vsrai.w $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = sdiv <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v2i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v2i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.d $vr1, $vr0, 63 ++; CHECK-NEXT: vsrli.d $vr1, $vr1, 61 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: vsrai.d $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = sdiv <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll +new file mode 100644 +index 000000000000..fa0aebaf28b3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @shl_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsll.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = shl <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsll.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = shl <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsll.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = shl <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsll.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = shl <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v16i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v16i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.b $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = shl <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v16i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v16i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.b $vr0, $vr0, 7 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = shl <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v8i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v8i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.h $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = shl <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v8i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v8i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = shl <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v4i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v4i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.w $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = shl <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.w $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = shl <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v2i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v2i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.d $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = shl <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v2i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v2i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.d $vr0, $vr0, 63 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = shl <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll +new file mode 100644 +index 000000000000..25b4623a47d1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsub.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = sub <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsub.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = sub <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsub.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = sub <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsub.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = sub <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v16i8_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v16i8_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = sub <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v8i16_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v8i16_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = sub <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = sub <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v2i64_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v2i64_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = sub <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll +new file mode 100644 +index 000000000000..abb60b91dd48 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @udiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = udiv <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = udiv <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = udiv <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = udiv <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v16i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v16i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.b $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = udiv <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v8i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v8i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.h $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = udiv <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v4i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v4i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.w $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = udiv <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v2i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v2i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.d $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = udiv <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0020-LoongArch-MC-Modify-branch-evaluation-for-MCInstrAna.patch b/0020-LoongArch-MC-Modify-branch-evaluation-for-MCInstrAna.patch new file mode 100644 index 0000000..a8074e8 --- /dev/null +++ b/0020-LoongArch-MC-Modify-branch-evaluation-for-MCInstrAna.patch @@ -0,0 +1,134 @@ +From 12275717d661ac4692edaaa48da15622e27493ca Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Thu, 23 Nov 2023 16:38:41 +0800 +Subject: [PATCH 20/27] [LoongArch][MC] Modify branch evaluation for + MCInstrAnalysis (#73205) + +Function evaluateBranch() is used to compute target address for a given +branch instruction and return true on success. But target address of +indirect branch cannot be simply added, so rule it out and just return +false. + +This patch also add objdump tests which capture the current state of +support for printing branch targets. Without this patch, the result of +"jirl $zero, $a0, 4" is "jirl $zero, $a0, 4 ". It is obviously +incorrect, because this instruction represents an indirect branch whose +target address depends on both the register value and the imm. After +this patch, it will be right despite loss of details. + +(cherry picked from commit 1c68c4c57a65a67963264878bc4646be8b58854c) +--- + .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 3 +- + .../llvm-objdump/ELF/LoongArch/branches.s | 76 +++++++++++++++++++ + .../llvm-objdump/ELF/LoongArch/lit.local.cfg | 2 + + 3 files changed, 80 insertions(+), 1 deletion(-) + create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s + create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +index d580c3457fec..a4e6a09863e6 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +@@ -97,7 +97,8 @@ public: + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override { + unsigned NumOps = Inst.getNumOperands(); +- if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { ++ if ((isBranch(Inst) && !isIndirectBranch(Inst)) || ++ Inst.getOpcode() == LoongArch::BL) { + Target = Addr + Inst.getOperand(NumOps - 1).getImm(); + return true; + } +diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s +new file mode 100644 +index 000000000000..8cb00aef9954 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s +@@ -0,0 +1,76 @@ ++# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | \ ++# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s ++# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | \ ++# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s ++ ++# CHECK-LABEL: : ++foo: ++# CHECK: beq $a0, $a1, 108 ++beq $a0, $a1, .Llocal ++# CHECK: bne $a0, $a1, 104 ++bne $a0, $a1, .Llocal ++# CHECK: blt $a0, $a1, 100 ++blt $a0, $a1, .Llocal ++# CHECK: bltu $a0, $a1, 96 ++bltu $a0, $a1, .Llocal ++# CHECK: bge $a0, $a1, 92 ++bge $a0, $a1, .Llocal ++# CHECK: bgeu $a0, $a1, 88 ++bgeu $a0, $a1, .Llocal ++# CHECK: beqz $a0, 84 ++beqz $a0, .Llocal ++# CHECK: bnez $a0, 80 ++bnez $a0, .Llocal ++# CHECK: bceqz $fcc6, 76 ++bceqz $fcc6, .Llocal ++# CHECK: bcnez $fcc6, 72 ++bcnez $fcc6, .Llocal ++ ++# CHECK: beq $a0, $a1, 76 ++beq $a0, $a1, bar ++# CHECK: bne $a0, $a1, 72 ++bne $a0, $a1, bar ++# CHECK: blt $a0, $a1, 68 ++blt $a0, $a1, bar ++# CHECK: bltu $a0, $a1, 64 ++bltu $a0, $a1, bar ++# CHECK: bge $a0, $a1, 60 ++bge $a0, $a1, bar ++# CHECK: bgeu $a0, $a1, 56 ++bgeu $a0, $a1, bar ++# CHECK: beqz $a0, 52 ++beqz $a0, bar ++# CHECK: bnez $a0, 48 ++bnez $a0, bar ++# CHECK: bceqz $fcc6, 44 ++bceqz $fcc6, bar ++# CHECK: bcnez $fcc6, 40 ++bcnez $fcc6, bar ++ ++# CHECK: b 28 ++b .Llocal ++# CHECK: b 32 ++b bar ++ ++# CHECK: bl 20 ++bl .Llocal ++# CHECK: bl 24 ++bl bar ++ ++# CHECK: jirl $zero, $a0, 4{{$}} ++jirl $zero, $a0, 4 ++# CHECK: jirl $ra, $a0, 4{{$}} ++jirl $ra, $a0, 4 ++# CHECK: ret ++ret ++ ++.Llocal: ++# CHECK: 6c: nop ++# CHECK: nop ++nop ++nop ++ ++# CHECK-LABEL: : ++bar: ++# CHECK: 74: nop ++nop +diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg +new file mode 100644 +index 000000000000..cc24278acbb4 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg +@@ -0,0 +1,2 @@ ++if not "LoongArch" in config.root.targets: ++ config.unsupported = True +-- +2.20.1 + diff --git a/0020-LoongArch-Support-march-la64v1.0-and-march-la64v1.1-.patch b/0020-LoongArch-Support-march-la64v1.0-and-march-la64v1.1-.patch new file mode 100644 index 0000000..23df372 --- /dev/null +++ b/0020-LoongArch-Support-march-la64v1.0-and-march-la64v1.1-.patch @@ -0,0 +1,240 @@ +From f6ae8f6c4084f1d2971ce4ae805c4d0af2d77396 Mon Sep 17 00:00:00 2001 +From: Ami-zhang +Date: Tue, 23 Jul 2024 14:03:28 +0800 +Subject: [PATCH 20/23] [LoongArch] Support -march=la64v1.0 and -march=la64v1.1 + (#100057) + +The newly added strings `la64v1.0` and `la64v1.1` in `-march` are as +described in LoongArch toolchains conventions (see [1]). + +The target-cpu/feature attributes are forwarded to compiler when +specifying particular `-march` parameter. The default cpu `loongarch64` +is returned when archname is `la64v1.0` or `la64v1.1`. + +In addition, this commit adds `la64v1.0`/`la64v1.1` to +"__loongarch_arch" and adds definition for macro "__loongarch_frecipe". + +[1]: https://github.com/loongson/la-toolchain-conventions + +(cherry picked from commit 5a1b9896ad5a7dcd25a1cc7a4d3fd44155e4b22d) +--- + clang/lib/Basic/Targets/LoongArch.cpp | 23 +++++++++++++++- + clang/lib/Basic/Targets/LoongArch.h | 2 ++ + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 10 +++++-- + clang/test/Driver/loongarch-march.c | 22 +++++++++++++++ + clang/test/Preprocessor/init-loongarch.c | 27 ++++++++++++++++--- + .../TargetParser/LoongArchTargetParser.cpp | 11 ++++++++ + 6 files changed, 88 insertions(+), 7 deletions(-) + +diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp +index 913404240916..5fede3d7cdc4 100644 +--- a/clang/lib/Basic/Targets/LoongArch.cpp ++++ b/clang/lib/Basic/Targets/LoongArch.cpp +@@ -200,7 +200,24 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, + + // Define __loongarch_arch. + StringRef ArchName = getCPU(); +- Builder.defineMacro("__loongarch_arch", Twine('"') + ArchName + Twine('"')); ++ if (ArchName == "loongarch64") { ++ if (HasFeatureLSX) { ++ // TODO: As more features of the V1.1 ISA are supported, a unified "v1.1" ++ // arch feature set will be used to include all sub-features belonging to ++ // the V1.1 ISA version. ++ if (HasFeatureFrecipe) ++ Builder.defineMacro("__loongarch_arch", ++ Twine('"') + "la64v1.1" + Twine('"')); ++ else ++ Builder.defineMacro("__loongarch_arch", ++ Twine('"') + "la64v1.0" + Twine('"')); ++ } else { ++ Builder.defineMacro("__loongarch_arch", ++ Twine('"') + ArchName + Twine('"')); ++ } ++ } else { ++ Builder.defineMacro("__loongarch_arch", Twine('"') + ArchName + Twine('"')); ++ } + + // Define __loongarch_tune. + StringRef TuneCPU = getTargetOpts().TuneCPU; +@@ -216,6 +233,8 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, + Builder.defineMacro("__loongarch_simd_width", "128"); + Builder.defineMacro("__loongarch_sx", Twine(1)); + } ++ if (HasFeatureFrecipe) ++ Builder.defineMacro("__loongarch_frecipe", Twine(1)); + + StringRef ABI = getABI(); + if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") +@@ -289,6 +308,8 @@ bool LoongArchTargetInfo::handleTargetFeatures( + HasFeatureLSX = true; + else if (Feature == "+lasx") + HasFeatureLASX = true; ++ else if (Feature == "+frecipe") ++ HasFeatureFrecipe = true; + } + return true; + } +diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h +index 3313102492cb..4d2965f5b3a3 100644 +--- a/clang/lib/Basic/Targets/LoongArch.h ++++ b/clang/lib/Basic/Targets/LoongArch.h +@@ -29,6 +29,7 @@ protected: + bool HasFeatureF; + bool HasFeatureLSX; + bool HasFeatureLASX; ++ bool HasFeatureFrecipe; + + public: + LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) +@@ -37,6 +38,7 @@ public: + HasFeatureF = false; + HasFeatureLSX = false; + HasFeatureLASX = false; ++ HasFeatureFrecipe = false; + LongDoubleWidth = 128; + LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +index 87d7b30ef5d3..21106c425206 100644 +--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -268,8 +268,14 @@ std::string loongarch::postProcessTargetCPUString(const std::string &CPU, + std::string loongarch::getLoongArchTargetCPU(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple) { + std::string CPU; ++ std::string Arch; + // If we have -march, use that. +- if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) +- CPU = A->getValue(); ++ if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { ++ Arch = A->getValue(); ++ if (Arch == "la64v1.0" || Arch == "la64v1.1") ++ CPU = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); ++ else ++ CPU = Arch; ++ } + return postProcessTargetCPUString(CPU, Triple); + } +diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c +index 9214130cd034..d06da72a755c 100644 +--- a/clang/test/Driver/loongarch-march.c ++++ b/clang/test/Driver/loongarch-march.c +@@ -2,10 +2,18 @@ + // RUN: FileCheck %s --check-prefix=CC1-LOONGARCH64 + // RUN: %clang --target=loongarch64 -march=la464 -fsyntax-only %s -### 2>&1 | \ + // RUN: FileCheck %s --check-prefix=CC1-LA464 ++// RUN: %clang --target=loongarch64 -march=la64v1.0 -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LA64V1P0 ++// RUN: %clang --target=loongarch64 -march=la64v1.1 -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LA64V1P1 + // RUN: %clang --target=loongarch64 -march=loongarch64 -S -emit-llvm %s -o - | \ + // RUN: FileCheck %s --check-prefix=IR-LOONGARCH64 + // RUN: %clang --target=loongarch64 -march=la464 -S -emit-llvm %s -o - | \ + // RUN: FileCheck %s --check-prefix=IR-LA464 ++// RUN: %clang --target=loongarch64 -march=la64v1.0 -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LA64V1P0 ++// RUN: %clang --target=loongarch64 -march=la64v1.1 -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LA64V1P1 + + // CC1-LOONGARCH64: "-target-cpu" "loongarch64" + // CC1-LOONGARCH64-NOT: "-target-feature" +@@ -19,8 +27,22 @@ + // CC1-LA464-NOT: "-target-feature" + // CC1-LA464: "-target-abi" "lp64d" + ++// CC1-LA64V1P0: "-target-cpu" "loongarch64" ++// CC1-LA64V1P0-NOT: "-target-feature" ++// CC1-LA64V1P0: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" ++// CC1-LA64V1P0-NOT: "-target-feature" ++// CC1-LA64V1P0: "-target-abi" "lp64d" ++ ++// CC1-LA64V1P1: "-target-cpu" "loongarch64" ++// CC1-LA64V1P1-NOT: "-target-feature" ++// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" ++// CC1-LA64V1P1-NOT: "-target-feature" ++// CC1-LA64V1P1: "-target-abi" "lp64d" ++ + // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" + // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" ++// IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual" ++// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lsx,+ual" + + int foo(void) { + return 3; +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +index 635d029ce9d3..cfa3ddb20f10 100644 +--- a/clang/test/Preprocessor/init-loongarch.c ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -788,24 +788,43 @@ + // LA64-FPU0-LP64S-NOT: #define __loongarch_single_float + // LA64-FPU0-LP64S: #define __loongarch_soft_float 1 + +-/// Check __loongarch_arch and __loongarch_tune. ++/// Check __loongarch_arch{_tune/_frecipe}. + + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 | \ + // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 | \ + // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=la464 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=loongarch64 | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la464 | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la464 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la464 | \ + // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 -mtune=loongarch64 | \ + // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 | \ ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang -lsx | \ ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe | \ ++// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \ ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \ ++// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \ ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \ ++// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \ ++// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \ ++// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s + + // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" ++// FRECIPE: #define __loongarch_frecipe 1 + // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" + + // RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ +diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp +index 772d24c5ce3d..8e86d18de2ad 100644 +--- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp ++++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp +@@ -44,6 +44,17 @@ bool LoongArch::getArchFeatures(StringRef Arch, + return true; + } + } ++ ++ if (Arch == "la64v1.0" || Arch == "la64v1.1") { ++ Features.push_back("+64bit"); ++ Features.push_back("+d"); ++ Features.push_back("+lsx"); ++ Features.push_back("+ual"); ++ if (Arch == "la64v1.1") ++ Features.push_back("+frecipe"); ++ return true; ++ } ++ + return false; + } + +-- +2.20.1 + diff --git a/0021-LoongArch-Add-codegen-support-for-insertelement.patch b/0021-LoongArch-Add-codegen-support-for-insertelement.patch new file mode 100644 index 0000000..9614d8c --- /dev/null +++ b/0021-LoongArch-Add-codegen-support-for-insertelement.patch @@ -0,0 +1,700 @@ +From bae7130f659fb0d052523d6e667684f178a9515b Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Thu, 16 Nov 2023 20:05:01 +0800 +Subject: [PATCH 21/42] [LoongArch] Add codegen support for insertelement + +(cherry picked from commit f2cbd1fdf702afe31d0198c9185e08dc2b104252) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 82 +++++- + .../Target/LoongArch/LoongArchISelLowering.h | 1 + + .../LoongArch/LoongArchLASXInstrInfo.td | 18 ++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 5 + + .../lasx/ir-instruction/insertelement.ll | 276 ++++++++++++++++++ + .../lsx/ir-instruction/insertelement.ll | 196 +++++++++++++ + 6 files changed, 576 insertions(+), 2 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 26e94a53b344..492339ce2151 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -237,7 +237,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it + // will be `Custom` handled in the future. + setOperationAction(ISD::BUILD_VECTOR, VT, Legal); +- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { +@@ -267,7 +267,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + + // FIXME: Same as above. + setOperationAction(ISD::BUILD_VECTOR, VT, Legal); +- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { +@@ -369,10 +369,20 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + return lowerRETURNADDR(Op, DAG); + case ISD::WRITE_REGISTER: + return lowerWRITE_REGISTER(Op, DAG); ++ case ISD::INSERT_VECTOR_ELT: ++ return lowerINSERT_VECTOR_ELT(Op, DAG); + } + return SDValue(); + } + ++SDValue ++LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ if (isa(Op->getOperand(2))) ++ return Op; ++ return SDValue(); ++} ++ + SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, + SelectionDAG &DAG) const { + +@@ -3040,6 +3050,71 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, + return SinkBB; + } + ++static MachineBasicBlock * ++emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, ++ const LoongArchSubtarget &Subtarget) { ++ unsigned InsOp; ++ unsigned HalfSize; ++ switch (MI.getOpcode()) { ++ default: ++ llvm_unreachable("Unexpected opcode"); ++ case LoongArch::PseudoXVINSGR2VR_B: ++ HalfSize = 16; ++ InsOp = LoongArch::VINSGR2VR_B; ++ break; ++ case LoongArch::PseudoXVINSGR2VR_H: ++ HalfSize = 8; ++ InsOp = LoongArch::VINSGR2VR_H; ++ break; ++ } ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; ++ const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; ++ DebugLoc DL = MI.getDebugLoc(); ++ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); ++ // XDst = vector_insert XSrc, Elt, Idx ++ Register XDst = MI.getOperand(0).getReg(); ++ Register XSrc = MI.getOperand(1).getReg(); ++ Register Elt = MI.getOperand(2).getReg(); ++ unsigned Idx = MI.getOperand(3).getImm(); ++ ++ Register ScratchReg1 = XSrc; ++ if (Idx >= HalfSize) { ++ ScratchReg1 = MRI.createVirtualRegister(RC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) ++ .addReg(XSrc) ++ .addReg(XSrc) ++ .addImm(1); ++ } ++ ++ Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); ++ Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) ++ .addReg(ScratchReg1, 0, LoongArch::sub_128); ++ BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) ++ .addReg(ScratchSubReg1) ++ .addReg(Elt) ++ .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); ++ ++ Register ScratchReg2 = XDst; ++ if (Idx >= HalfSize) ++ ScratchReg2 = MRI.createVirtualRegister(RC); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) ++ .addImm(0) ++ .addReg(ScratchSubReg2) ++ .addImm(LoongArch::sub_128); ++ ++ if (Idx >= HalfSize) ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) ++ .addReg(XSrc) ++ .addReg(ScratchReg2) ++ .addImm(2); ++ ++ MI.eraseFromParent(); ++ return BB; ++} ++ + MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); +@@ -3095,6 +3170,9 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + case LoongArch::PseudoXVBNZ_W: + case LoongArch::PseudoXVBNZ_D: + return emitVecCondBranchPseudo(MI, BB, Subtarget); ++ case LoongArch::PseudoXVINSGR2VR_B: ++ case LoongArch::PseudoXVINSGR2VR_H: ++ return emitPseudoXVINSGR2VR(MI, BB, Subtarget); + } + } + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 7765057ebffb..29028ff963d0 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -275,6 +275,7 @@ private: + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 380206ddcf10..475565db15c9 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1065,6 +1065,13 @@ def PseudoXVBZ_W : VecCond; + def PseudoXVBZ_D : VecCond; + def PseudoXVBZ : VecCond; + ++let usesCustomInserter = 1, Constraints = "$xd = $dst" in { ++def PseudoXVINSGR2VR_B ++ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm5:$imm)>; ++def PseudoXVINSGR2VR_H ++ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>; ++} // usesCustomInserter = 1, Constraints = "$xd = $dst" ++ + } // Predicates = [HasExtLASX] + + multiclass PatXr { +@@ -1365,12 +1372,23 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + ++// PseudoXVINSGR2VR_{B/H} ++def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), ++ (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; ++def : Pat<(vector_insert v16i16:$xd, GRLenVT:$rj, uimm4:$imm), ++ (PseudoXVINSGR2VR_H v16i16:$xd, GRLenVT:$rj, uimm4:$imm)>; ++ + // XVINSGR2VR_{W/D} + def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), + (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; + def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), + (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; + ++def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm), ++ (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>; ++def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm), ++ (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>; ++ + // XVPICKVE2GR_W[U] + def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), + (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 980870e34503..d8fd132a1c59 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1462,6 +1462,11 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), + def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), + (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; + ++def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm), ++ (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>; ++def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm), ++ (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>; ++ + // VPICKVE2GR_{B/H/W}[U] + def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), + (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll +new file mode 100644 +index 000000000000..e571a5d2e4cf +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll +@@ -0,0 +1,276 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind { ++; CHECK-LABEL: insert_32xi8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %v_new = insertelement <32 x i8> %v, i8 %in, i32 1 ++ store <32 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind { ++; CHECK-LABEL: insert_32xi8_upper: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 0 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %v_new = insertelement <32 x i8> %v, i8 %in, i32 16 ++ store <32 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_16xi16(ptr %src, ptr %dst, i16 %in) nounwind { ++; CHECK-LABEL: insert_16xi16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %v_new = insertelement <16 x i16> %v, i16 %in, i32 1 ++ store <16 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind { ++; CHECK-LABEL: insert_16xi16_upper: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 0 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %v_new = insertelement <16 x i16> %v, i16 %in, i32 8 ++ store <16 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind { ++; CHECK-LABEL: insert_8xi32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i32>, ptr %src ++ %v_new = insertelement <8 x i32> %v, i32 %in, i32 1 ++ store <8 x i32> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind { ++; CHECK-LABEL: insert_4xi64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i64>, ptr %src ++ %v_new = insertelement <4 x i64> %v, i64 %in, i32 1 ++ store <4 x i64> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind { ++; CHECK-LABEL: insert_8xfloat: ++; CHECK: # %bb.0: ++; CHECK-NEXT: movfr2gr.s $a2, $fa0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x float>, ptr %src ++ %v_new = insertelement <8 x float> %v, float %in, i32 1 ++ store <8 x float> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind { ++; CHECK-LABEL: insert_4xdouble: ++; CHECK: # %bb.0: ++; CHECK-NEXT: movfr2gr.d $a2, $fa0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x double>, ptr %src ++ %v_new = insertelement <4 x double> %v, double %in, i32 1 ++ store <4 x double> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_32xi8_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a4, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a4, 5 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 ++; CHECK-NEXT: st.b $a2, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx ++ store <32 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_16xi16_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a4, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a4, 5 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 ++; CHECK-NEXT: st.h $a2, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx ++ store <16 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_8xi32_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a4, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a4, 5 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 ++; CHECK-NEXT: st.w $a2, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i32>, ptr %src ++ %v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx ++ store <8 x i32> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_4xi64_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a4, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a4, 5 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 ++; CHECK-NEXT: st.d $a2, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i64>, ptr %src ++ %v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx ++ store <4 x i64> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_8xfloat_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 ++; CHECK-NEXT: xvld $xr1, $a0, 0 ++; CHECK-NEXT: xvst $xr1, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 ++; CHECK-NEXT: fst.s $fa0, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x float>, ptr %src ++ %v_new = insertelement <8 x float> %v, float %in, i32 %idx ++ store <8 x float> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_4xdouble_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 ++; CHECK-NEXT: xvld $xr1, $a0, 0 ++; CHECK-NEXT: xvst $xr1, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 ++; CHECK-NEXT: fst.d $fa0, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x double>, ptr %src ++ %v_new = insertelement <4 x double> %v, double %in, i32 %idx ++ store <4 x double> %v_new, ptr %dst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll +new file mode 100644 +index 000000000000..a9834591aa0e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll +@@ -0,0 +1,196 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @insert_16xi8(ptr %src, ptr %dst, i8 %ins) nounwind { ++; CHECK-LABEL: insert_16xi8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i8>, ptr %src ++ %v_new = insertelement <16 x i8> %v, i8 %ins, i32 1 ++ store <16 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xi16(ptr %src, ptr %dst, i16 %ins) nounwind { ++; CHECK-LABEL: insert_8xi16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i16>, ptr %src ++ %v_new = insertelement <8 x i16> %v, i16 %ins, i32 1 ++ store <8 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xi32(ptr %src, ptr %dst, i32 %ins) nounwind { ++; CHECK-LABEL: insert_4xi32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %v_new = insertelement <4 x i32> %v, i32 %ins, i32 1 ++ store <4 x i32> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_2xi64(ptr %src, ptr %dst, i64 %ins) nounwind { ++; CHECK-LABEL: insert_2xi64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x i64>, ptr %src ++ %v_new = insertelement <2 x i64> %v, i64 %ins, i32 1 ++ store <2 x i64> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xfloat(ptr %src, ptr %dst, float %ins) nounwind { ++; CHECK-LABEL: insert_4xfloat: ++; CHECK: # %bb.0: ++; CHECK-NEXT: movfr2gr.s $a2, $fa0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x float>, ptr %src ++ %v_new = insertelement <4 x float> %v, float %ins, i32 1 ++ store <4 x float> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind { ++; CHECK-LABEL: insert_2xdouble: ++; CHECK: # %bb.0: ++; CHECK-NEXT: movfr2gr.d $a2, $fa0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x double>, ptr %src ++ %v_new = insertelement <2 x double> %v, double %ins, i32 1 ++ store <2 x double> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_16xi8_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vst $vr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0 ++; CHECK-NEXT: st.b $a2, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i8>, ptr %src ++ %v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx ++ store <16 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_8xi16_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vst $vr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1 ++; CHECK-NEXT: st.h $a2, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i16>, ptr %src ++ %v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx ++ store <8 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_4xi32_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vst $vr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2 ++; CHECK-NEXT: st.w $a2, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx ++ store <4 x i32> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_2xi64_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vst $vr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3 ++; CHECK-NEXT: st.d $a2, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x i64>, ptr %src ++ %v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx ++ store <2 x i64> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_4xfloat_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr1, $a0, 0 ++; CHECK-NEXT: vst $vr1, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2 ++; CHECK-NEXT: fst.s $fa0, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x float>, ptr %src ++ %v_new = insertelement <4 x float> %v, float %ins, i32 %idx ++ store <4 x float> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_2xdouble_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr1, $a0, 0 ++; CHECK-NEXT: vst $vr1, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3 ++; CHECK-NEXT: fst.d $fa0, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x double>, ptr %src ++ %v_new = insertelement <2 x double> %v, double %ins, i32 %idx ++ store <2 x double> %v_new, ptr %dst ++ ret void ++} +-- +2.20.1 + diff --git a/0021-LoongArch-Precommit-a-test-for-smul-with-overflow-NF.patch b/0021-LoongArch-Precommit-a-test-for-smul-with-overflow-NF.patch new file mode 100644 index 0000000..23ae3ef --- /dev/null +++ b/0021-LoongArch-Precommit-a-test-for-smul-with-overflow-NF.patch @@ -0,0 +1,139 @@ +From 91fcc287c4a1a267f20c459177cf6203a8c6c3ed Mon Sep 17 00:00:00 2001 +From: hev +Date: Thu, 23 Nov 2023 15:15:26 +0800 +Subject: [PATCH 21/27] [LoongArch] Precommit a test for smul with overflow + (NFC) (#73212) + +(cherry picked from commit 7414c0db962f8a5029fd44c3e0bc93d9ce20be71) +--- + .../CodeGen/LoongArch/smul-with-overflow.ll | 118 ++++++++++++++++++ + 1 file changed, 118 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/smul-with-overflow.ll + +diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +new file mode 100644 +index 000000000000..a53e77e5aa4b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +@@ -0,0 +1,118 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 ++ ++define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { ++; LA32-LABEL: smuloi64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: .cfi_offset 22, -8 ++; LA32-NEXT: move $fp, $a4 ++; LA32-NEXT: st.w $zero, $sp, 4 ++; LA32-NEXT: addi.w $a4, $sp, 4 ++; LA32-NEXT: bl %plt(__mulodi4) ++; LA32-NEXT: st.w $a1, $fp, 4 ++; LA32-NEXT: st.w $a0, $fp, 0 ++; LA32-NEXT: ld.w $a0, $sp, 4 ++; LA32-NEXT: sltu $a0, $zero, $a0 ++; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: smuloi64: ++; LA64: # %bb.0: ++; LA64-NEXT: mul.d $a3, $a0, $a1 ++; LA64-NEXT: st.d $a3, $a2, 0 ++; LA64-NEXT: mulh.d $a0, $a0, $a1 ++; LA64-NEXT: srai.d $a1, $a3, 63 ++; LA64-NEXT: xor $a0, $a0, $a1 ++; LA64-NEXT: sltu $a0, $zero, $a0 ++; LA64-NEXT: ret ++ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) ++ %val = extractvalue {i64, i1} %t, 0 ++ %obit = extractvalue {i64, i1} %t, 1 ++ store i64 %val, ptr %res ++ ret i1 %obit ++} ++ ++define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ++; LA32-LABEL: smuloi128: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -64 ++; LA32-NEXT: .cfi_def_cfa_offset 64 ++; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill ++; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: .cfi_offset 22, -8 ++; LA32-NEXT: move $fp, $a2 ++; LA32-NEXT: st.w $zero, $sp, 52 ++; LA32-NEXT: ld.w $a2, $a1, 12 ++; LA32-NEXT: st.w $a2, $sp, 12 ++; LA32-NEXT: ld.w $a2, $a1, 8 ++; LA32-NEXT: st.w $a2, $sp, 8 ++; LA32-NEXT: ld.w $a2, $a1, 4 ++; LA32-NEXT: st.w $a2, $sp, 4 ++; LA32-NEXT: ld.w $a1, $a1, 0 ++; LA32-NEXT: st.w $a1, $sp, 0 ++; LA32-NEXT: ld.w $a1, $a0, 12 ++; LA32-NEXT: st.w $a1, $sp, 28 ++; LA32-NEXT: ld.w $a1, $a0, 8 ++; LA32-NEXT: st.w $a1, $sp, 24 ++; LA32-NEXT: ld.w $a1, $a0, 4 ++; LA32-NEXT: st.w $a1, $sp, 20 ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: st.w $a0, $sp, 16 ++; LA32-NEXT: addi.w $a0, $sp, 32 ++; LA32-NEXT: addi.w $a1, $sp, 16 ++; LA32-NEXT: addi.w $a2, $sp, 0 ++; LA32-NEXT: addi.w $a3, $sp, 52 ++; LA32-NEXT: bl %plt(__muloti4) ++; LA32-NEXT: ld.w $a0, $sp, 44 ++; LA32-NEXT: st.w $a0, $fp, 12 ++; LA32-NEXT: ld.w $a0, $sp, 40 ++; LA32-NEXT: st.w $a0, $fp, 8 ++; LA32-NEXT: ld.w $a0, $sp, 36 ++; LA32-NEXT: st.w $a0, $fp, 4 ++; LA32-NEXT: ld.w $a0, $sp, 32 ++; LA32-NEXT: st.w $a0, $fp, 0 ++; LA32-NEXT: ld.w $a0, $sp, 52 ++; LA32-NEXT: sltu $a0, $zero, $a0 ++; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 64 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: smuloi128: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.d $sp, $sp, -32 ++; LA64-NEXT: .cfi_def_cfa_offset 32 ++; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ++; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ++; LA64-NEXT: .cfi_offset 1, -8 ++; LA64-NEXT: .cfi_offset 22, -16 ++; LA64-NEXT: move $fp, $a4 ++; LA64-NEXT: st.d $zero, $sp, 8 ++; LA64-NEXT: addi.d $a4, $sp, 8 ++; LA64-NEXT: bl %plt(__muloti4) ++; LA64-NEXT: st.d $a1, $fp, 8 ++; LA64-NEXT: st.d $a0, $fp, 0 ++; LA64-NEXT: ld.d $a0, $sp, 8 ++; LA64-NEXT: sltu $a0, $zero, $a0 ++; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload ++; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ++; LA64-NEXT: addi.d $sp, $sp, 32 ++; LA64-NEXT: ret ++ %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) ++ %val = extractvalue {i128, i1} %t, 0 ++ %obit = extractvalue {i128, i1} %t, 1 ++ store i128 %val, ptr %res ++ ret i1 %obit ++} ++ ++declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone ++declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone +-- +2.20.1 + diff --git a/0021-LoongArch-Support-la664-100068.patch b/0021-LoongArch-Support-la664-100068.patch new file mode 100644 index 0000000..d91187b --- /dev/null +++ b/0021-LoongArch-Support-la664-100068.patch @@ -0,0 +1,182 @@ +From 5778c54f740ad547aba03f9f0db4f7641faedcc9 Mon Sep 17 00:00:00 2001 +From: Ami-zhang +Date: Tue, 23 Jul 2024 15:14:20 +0800 +Subject: [PATCH 21/23] [LoongArch] Support la664 (#100068) + +A new ProcessorModel called `la664` is defined in LoongArch.td to +support `-march/-mtune=la664`. + +(cherry picked from commit fcec298087dba0c83f6d0bbafd6cd934c42cbf82) +--- + clang/test/Driver/loongarch-march.c | 11 +++++++++++ + clang/test/Driver/loongarch-mtune.c | 5 +++++ + clang/test/Preprocessor/init-loongarch.c | 8 ++++++++ + .../llvm/TargetParser/LoongArchTargetParser.def | 2 ++ + .../include/llvm/TargetParser/LoongArchTargetParser.h | 3 +++ + llvm/lib/Target/LoongArch/LoongArch.td | 7 +++++++ + llvm/lib/TargetParser/Host.cpp | 2 ++ + llvm/test/CodeGen/LoongArch/cpus.ll | 5 +++++ + 8 files changed, 43 insertions(+) + +diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c +index d06da72a755c..2d5b315d962a 100644 +--- a/clang/test/Driver/loongarch-march.c ++++ b/clang/test/Driver/loongarch-march.c +@@ -6,6 +6,8 @@ + // RUN: FileCheck %s --check-prefix=CC1-LA64V1P0 + // RUN: %clang --target=loongarch64 -march=la64v1.1 -fsyntax-only %s -### 2>&1 | \ + // RUN: FileCheck %s --check-prefix=CC1-LA64V1P1 ++// RUN: %clang --target=loongarch64 -march=la664 -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LA664 + // RUN: %clang --target=loongarch64 -march=loongarch64 -S -emit-llvm %s -o - | \ + // RUN: FileCheck %s --check-prefix=IR-LOONGARCH64 + // RUN: %clang --target=loongarch64 -march=la464 -S -emit-llvm %s -o - | \ +@@ -14,6 +16,8 @@ + // RUN: FileCheck %s --check-prefix=IR-LA64V1P0 + // RUN: %clang --target=loongarch64 -march=la64v1.1 -S -emit-llvm %s -o - | \ + // RUN: FileCheck %s --check-prefix=IR-LA64V1P1 ++// RUN: %clang --target=loongarch64 -march=la664 -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LA664 + + // CC1-LOONGARCH64: "-target-cpu" "loongarch64" + // CC1-LOONGARCH64-NOT: "-target-feature" +@@ -39,10 +43,17 @@ + // CC1-LA64V1P1-NOT: "-target-feature" + // CC1-LA64V1P1: "-target-abi" "lp64d" + ++// CC1-LA664: "-target-cpu" "la664" ++// CC1-LA664-NOT: "-target-feature" ++// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" ++// CC1-LA664-NOT: "-target-feature" ++// CC1-LA664: "-target-abi" "lp64d" ++ + // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" + // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" + // IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual" + // IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lsx,+ual" ++// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lasx,+lsx,+ual" + + int foo(void) { + return 3; +diff --git a/clang/test/Driver/loongarch-mtune.c b/clang/test/Driver/loongarch-mtune.c +index 6f3f39e9bbd8..face12e1a1a8 100644 +--- a/clang/test/Driver/loongarch-mtune.c ++++ b/clang/test/Driver/loongarch-mtune.c +@@ -8,6 +8,11 @@ + // RUN: %clang --target=loongarch64 -mtune=la464 -S -emit-llvm %s -o - | \ + // RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la464 + ++// RUN: %clang --target=loongarch64 -mtune=la664 -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=la664 ++// RUN: %clang --target=loongarch64 -mtune=la664 -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la664 ++ + // RUN: %clang --target=loongarch64 -mtune=invalidcpu -fsyntax-only %s -### 2>&1 | \ + // RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=invalidcpu + // RUN: not %clang --target=loongarch64 -mtune=invalidcpu -S -emit-llvm %s -o /dev/null 2>&1 | \ +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +index cfa3ddb20f10..7ce3d2de8c78 100644 +--- a/clang/test/Preprocessor/init-loongarch.c ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -822,6 +822,14 @@ + // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \ + // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \ ++// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=la664 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \ ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \ ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \ ++// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=loongarch64 %s + + // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" + // FRECIPE: #define __loongarch_frecipe 1 +diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +index b20d124953f8..101a48cbd539 100644 +--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def ++++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +@@ -10,6 +10,7 @@ LOONGARCH_FEATURE("+lasx", FK_LASX) + LOONGARCH_FEATURE("+lbt", FK_LBT) + LOONGARCH_FEATURE("+lvz", FK_LVZ) + LOONGARCH_FEATURE("+ual", FK_UAL) ++LOONGARCH_FEATURE("+frecipe", FK_FRECIPE) + + #undef LOONGARCH_FEATURE + +@@ -19,5 +20,6 @@ LOONGARCH_FEATURE("+ual", FK_UAL) + + LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) + LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) ++LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE) + + #undef LOONGARCH_ARCH +diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +index 028844187584..c0bb15a5163b 100644 +--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h ++++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +@@ -46,6 +46,9 @@ enum FeatureKind : uint32_t { + + // Allow memory accesses to be unaligned. + FK_UAL = 1 << 8, ++ ++ // Floating-point approximate reciprocal instructions are available. ++ FK_FRECIPE = 1 << 9, + }; + + struct FeatureInfo { +diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td +index 5573e5415d26..b5cd5bb0f8a4 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch.td ++++ b/llvm/lib/Target/LoongArch/LoongArch.td +@@ -147,6 +147,13 @@ def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, + FeatureExtLVZ, + FeatureExtLBT]>; + ++def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit, ++ FeatureUAL, ++ FeatureExtLASX, ++ FeatureExtLVZ, ++ FeatureExtLBT, ++ FeatureFrecipe]>; ++ + //===----------------------------------------------------------------------===// + // Define the LoongArch target. + //===----------------------------------------------------------------------===// +diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp +index d11dc605e188..c2de2293d1d0 100644 +--- a/llvm/lib/TargetParser/Host.cpp ++++ b/llvm/lib/TargetParser/Host.cpp +@@ -1466,6 +1466,8 @@ StringRef sys::getHostCPUName() { + switch (processor_id & 0xf000) { + case 0xc000: // Loongson 64bit, 4-issue + return "la464"; ++ case 0xd000: // Loongson 64bit, 6-issue ++ return "la664"; + // TODO: Others. + default: + break; +diff --git a/llvm/test/CodeGen/LoongArch/cpus.ll b/llvm/test/CodeGen/LoongArch/cpus.ll +index 35945ae4de71..087cf887b813 100644 +--- a/llvm/test/CodeGen/LoongArch/cpus.ll ++++ b/llvm/test/CodeGen/LoongArch/cpus.ll +@@ -3,6 +3,7 @@ + + ; RUN: llc < %s --mtriple=loongarch64 --mcpu=loongarch64 2>&1 | FileCheck %s + ; RUN: llc < %s --mtriple=loongarch64 --mcpu=la464 2>&1 | FileCheck %s ++; RUN: llc < %s --mtriple=loongarch64 --mcpu=la664 2>&1 | FileCheck %s + ; RUN: llc < %s --mtriple=loongarch64 2>&1 | FileCheck %s + + ; CHECK-NOT: {{.*}} is not a recognized processor for this target +@@ -18,3 +19,7 @@ define void @tune_cpu_loongarch64() "tune-cpu"="loongarch64" { + define void @tune_cpu_la464() "tune-cpu"="la464" { + ret void + } ++ ++define void @tune_cpu_la664() "tune-cpu"="la664" { ++ ret void ++} +-- +2.20.1 + diff --git a/0022-LoongArch-Custom-lowering-ISD-BUILD_VECTOR.patch b/0022-LoongArch-Custom-lowering-ISD-BUILD_VECTOR.patch new file mode 100644 index 0000000..8bca0d8 --- /dev/null +++ b/0022-LoongArch-Custom-lowering-ISD-BUILD_VECTOR.patch @@ -0,0 +1,1301 @@ +From ee5ceea855a05e3aceda6fa092e94741ddc4bb53 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 25 Oct 2023 17:00:32 +0800 +Subject: [PATCH 22/42] [LoongArch] Custom lowering `ISD::BUILD_VECTOR` + +(cherry picked from commit add224c0a094d20389d3659f7b6e496df461a976) + +--- + .../LoongArch/LoongArchISelDAGToDAG.cpp | 52 +- + .../LoongArch/LoongArchISelLowering.cpp | 102 +++- + .../Target/LoongArch/LoongArchISelLowering.h | 1 + + .../LoongArch/LoongArchLASXInstrInfo.td | 13 + + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 12 +- + .../CodeGen/LoongArch/lasx/build-vector.ll | 551 ++++++++++++++++++ + .../CodeGen/LoongArch/lsx/build-vector.ll | 376 ++++++++++++ + .../LoongArch/lsx/ir-instruction/mul.ll | 28 +- + 8 files changed, 1112 insertions(+), 23 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/build-vector.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/build-vector.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +index f55184019988..01b2f720f902 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +@@ -77,13 +77,63 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { + return; + } + case ISD::BITCAST: { +- if (VT.is128BitVector() || VT.is512BitVector()) { ++ if (VT.is128BitVector() || VT.is256BitVector()) { + ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); + CurDAG->RemoveDeadNode(Node); + return; + } + break; + } ++ case ISD::BUILD_VECTOR: { ++ // Select appropriate [x]vrepli.[bhwd] instructions for constant splats of ++ // 128/256-bit when LSX/LASX is enabled. ++ BuildVectorSDNode *BVN = cast(Node); ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ unsigned Op; ++ EVT ViaVecTy; ++ bool Is128Vec = BVN->getValueType(0).is128BitVector(); ++ bool Is256Vec = BVN->getValueType(0).is256BitVector(); ++ ++ if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec)) ++ break; ++ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, ++ HasAnyUndefs, 8)) ++ break; ++ ++ switch (SplatBitSize) { ++ default: ++ break; ++ case 8: ++ Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B; ++ ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8; ++ break; ++ case 16: ++ Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H; ++ ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16; ++ break; ++ case 32: ++ Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W; ++ ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32; ++ break; ++ case 64: ++ Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D; ++ ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64; ++ break; ++ } ++ ++ SDNode *Res; ++ // If we have a signed 10 bit integer, we can splat it directly. ++ if (SplatValue.isSignedIntN(10)) { ++ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, ++ ViaVecTy.getVectorElementType()); ++ Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm); ++ ReplaceNode(Node, Res); ++ return; ++ } ++ break; ++ } + } + + // Select the default instruction. +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 492339ce2151..1b60bfc3bddb 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -234,11 +234,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + +- // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it +- // will be `Custom` handled in the future. +- setOperationAction(ISD::BUILD_VECTOR, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); +@@ -265,10 +263,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + +- // FIXME: Same as above. +- setOperationAction(ISD::BUILD_VECTOR, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); +@@ -371,10 +368,105 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + return lowerWRITE_REGISTER(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); ++ case ISD::BUILD_VECTOR: ++ return lowerBUILD_VECTOR(Op, DAG); + } + return SDValue(); + } + ++static bool isConstantOrUndef(const SDValue Op) { ++ if (Op->isUndef()) ++ return true; ++ if (isa(Op)) ++ return true; ++ if (isa(Op)) ++ return true; ++ return false; ++} ++ ++static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { ++ for (unsigned i = 0; i < Op->getNumOperands(); ++i) ++ if (isConstantOrUndef(Op->getOperand(i))) ++ return true; ++ return false; ++} ++ ++SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, ++ SelectionDAG &DAG) const { ++ BuildVectorSDNode *Node = cast(Op); ++ EVT ResTy = Op->getValueType(0); ++ SDLoc DL(Op); ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ bool Is128Vec = ResTy.is128BitVector(); ++ bool Is256Vec = ResTy.is256BitVector(); ++ ++ if ((!Subtarget.hasExtLSX() || !Is128Vec) && ++ (!Subtarget.hasExtLASX() || !Is256Vec)) ++ return SDValue(); ++ ++ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ /*MinSplatBits=*/8) && ++ SplatBitSize <= 64) { ++ // We can only cope with 8, 16, 32, or 64-bit elements. ++ if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && ++ SplatBitSize != 64) ++ return SDValue(); ++ ++ EVT ViaVecTy; ++ ++ switch (SplatBitSize) { ++ default: ++ return SDValue(); ++ case 8: ++ ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; ++ break; ++ case 16: ++ ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; ++ break; ++ case 32: ++ ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; ++ break; ++ case 64: ++ ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; ++ break; ++ } ++ ++ // SelectionDAG::getConstant will promote SplatValue appropriately. ++ SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); ++ ++ // Bitcast to the type we originally wanted. ++ if (ViaVecTy != ResTy) ++ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); ++ ++ return Result; ++ } ++ ++ if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) ++ return Op; ++ ++ if (!isConstantOrUndefBUILD_VECTOR(Node)) { ++ // Use INSERT_VECTOR_ELT operations rather than expand to stores. ++ // The resulting code is the same length as the expansion, but it doesn't ++ // use memory operations. ++ EVT ResTy = Node->getValueType(0); ++ ++ assert(ResTy.isVector()); ++ ++ unsigned NumElts = ResTy.getVectorNumElements(); ++ SDValue Vector = DAG.getUNDEF(ResTy); ++ for (unsigned i = 0; i < NumElts; ++i) { ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, ++ Node->getOperand(i), ++ DAG.getConstant(i, DL, Subtarget.getGRLenVT())); ++ } ++ return Vector; ++ } ++ ++ return SDValue(); ++} ++ + SDValue + LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 29028ff963d0..111376306374 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -276,6 +276,7 @@ private: + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 475565db15c9..4487152fb42b 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -33,6 +33,13 @@ def lasxsplati32 + def lasxsplati64 + : PatFrag<(ops node:$e0), + (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplatf32 ++ : PatFrag<(ops node:$e0), ++ (v8f32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplatf64 ++ : PatFrag<(ops node:$e0), ++ (v4f64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; + + //===----------------------------------------------------------------------===// + // Instruction class templates +@@ -1411,6 +1418,12 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), + def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), + (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; + ++// XVREPL128VEI_{W/D} ++def : Pat<(lasxsplatf32 FPR32:$fj), ++ (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; ++def : Pat<(lasxsplatf64 FPR64:$fj), ++ (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; ++ + // Loads/Stores + foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { + defm : LdPat; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index d8fd132a1c59..deac5015882d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -141,9 +141,13 @@ def lsxsplati16 : PatFrag<(ops node:$e0), + def lsxsplati32 : PatFrag<(ops node:$e0), + (v4i32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +- + def lsxsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector node:$e0, node:$e0))>; ++def lsxsplatf32 : PatFrag<(ops node:$e0), ++ (v4f32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def lsxsplatf64 : PatFrag<(ops node:$e0), ++ (v2f64 (build_vector node:$e0, node:$e0))>; + + def to_valid_timm : SDNodeXForm(N); +@@ -1498,6 +1502,12 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), + def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), + (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; + ++// VREPLVEI_{W/D} ++def : Pat<(lsxsplatf32 FPR32:$fj), ++ (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; ++def : Pat<(lsxsplatf64 FPR64:$fj), ++ (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; ++ + // Loads/Stores + foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { + defm : LdPat; +diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +new file mode 100644 +index 000000000000..6824ab5cda8d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +@@ -0,0 +1,551 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @buildvector_v32i8_splat(ptr %dst, i8 %a0) nounwind { ++; CHECK-LABEL: buildvector_v32i8_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.b $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <32 x i8> undef, i8 %a0, i8 0 ++ %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ++ store <32 x i8> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i16_splat(ptr %dst, i16 %a0) nounwind { ++; CHECK-LABEL: buildvector_v16i16_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <16 x i16> undef, i16 %a0, i8 0 ++ %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ++ store <16 x i16> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i32_splat(ptr %dst, i32 %a0) nounwind { ++; CHECK-LABEL: buildvector_v8i32_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <8 x i32> undef, i32 %a0, i8 0 ++ %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ++ store <8 x i32> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i64_splat(ptr %dst, i64 %a0) nounwind { ++; CHECK-LABEL: buildvector_v4i64_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <4 x i64> undef, i64 %a0, i8 0 ++ %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ++ store <4 x i64> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { ++; CHECK-LABEL: buildvector_v8f32_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 ++; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <8 x float> undef, float %a0, i8 0 ++ %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer ++ store <8 x float> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { ++; CHECK-LABEL: buildvector_v4f64_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 ++; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <4 x double> undef, double %a0, i8 0 ++ %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer ++ store <4 x double> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v32i8_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v32i8_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.b $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <32 x i8> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i16_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v16i16_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.h $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <16 x i16> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i32_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v8i32_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.w $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x i32> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i64_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4i64_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.d $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x i64> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f32_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $a1, 260096 ++; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x float> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f64_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4f64_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu52i.d $a1, $zero, 1023 ++; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x double> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v32i8_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v32i8_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <32 x i8> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i16_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v16i16_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <16 x i16> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i32_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v8i32_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x i32> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i64_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4i64_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x i64> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f32_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f32_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x float> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f64_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4f64_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x double> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind { ++; CHECK-LABEL: buildvector_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 ++; CHECK-NEXT: ld.b $a1, $sp, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 ++; CHECK-NEXT: ld.b $a1, $sp, 8 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 ++; CHECK-NEXT: ld.b $a1, $sp, 16 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 ++; CHECK-NEXT: ld.b $a1, $sp, 24 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 ++; CHECK-NEXT: ld.b $a1, $sp, 32 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 ++; CHECK-NEXT: ld.b $a1, $sp, 40 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 ++; CHECK-NEXT: ld.b $a1, $sp, 48 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 ++; CHECK-NEXT: ld.b $a1, $sp, 56 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 ++; CHECK-NEXT: ld.b $a1, $sp, 64 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 ++; CHECK-NEXT: ld.b $a1, $sp, 72 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 80 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 88 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 96 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 104 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 112 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 120 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 128 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 136 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 144 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 152 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 160 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 168 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 176 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 184 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 192 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 ++ %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 ++ %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 ++ %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3 ++ %ins4 = insertelement <32 x i8> %ins3, i8 %a4, i32 4 ++ %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5 ++ %ins6 = insertelement <32 x i8> %ins5, i8 %a6, i32 6 ++ %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7 ++ %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8 ++ %ins9 = insertelement <32 x i8> %ins8, i8 %a9, i32 9 ++ %ins10 = insertelement <32 x i8> %ins9, i8 %a10, i32 10 ++ %ins11 = insertelement <32 x i8> %ins10, i8 %a11, i32 11 ++ %ins12 = insertelement <32 x i8> %ins11, i8 %a12, i32 12 ++ %ins13 = insertelement <32 x i8> %ins12, i8 %a13, i32 13 ++ %ins14 = insertelement <32 x i8> %ins13, i8 %a14, i32 14 ++ %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15 ++ %ins16 = insertelement <32 x i8> %ins15, i8 %a16, i32 16 ++ %ins17 = insertelement <32 x i8> %ins16, i8 %a17, i32 17 ++ %ins18 = insertelement <32 x i8> %ins17, i8 %a18, i32 18 ++ %ins19 = insertelement <32 x i8> %ins18, i8 %a19, i32 19 ++ %ins20 = insertelement <32 x i8> %ins19, i8 %a20, i32 20 ++ %ins21 = insertelement <32 x i8> %ins20, i8 %a21, i32 21 ++ %ins22 = insertelement <32 x i8> %ins21, i8 %a22, i32 22 ++ %ins23 = insertelement <32 x i8> %ins22, i8 %a23, i32 23 ++ %ins24 = insertelement <32 x i8> %ins23, i8 %a24, i32 24 ++ %ins25 = insertelement <32 x i8> %ins24, i8 %a25, i32 25 ++ %ins26 = insertelement <32 x i8> %ins25, i8 %a26, i32 26 ++ %ins27 = insertelement <32 x i8> %ins26, i8 %a27, i32 27 ++ %ins28 = insertelement <32 x i8> %ins27, i8 %a28, i32 28 ++ %ins29 = insertelement <32 x i8> %ins28, i8 %a29, i32 29 ++ %ins30 = insertelement <32 x i8> %ins29, i8 %a30, i32 30 ++ %ins31 = insertelement <32 x i8> %ins30, i8 %a31, i32 31 ++ store <32 x i8> %ins31, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { ++; CHECK-LABEL: buildvector_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 ++; CHECK-NEXT: ld.h $a1, $sp, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 ++; CHECK-NEXT: ld.h $a1, $sp, 8 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 16 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 24 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 32 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 40 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 48 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 56 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 64 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 ++ %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 ++ %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 ++ %ins3 = insertelement <16 x i16> %ins2, i16 %a3, i32 3 ++ %ins4 = insertelement <16 x i16> %ins3, i16 %a4, i32 4 ++ %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5 ++ %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6 ++ %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7 ++ %ins8 = insertelement <16 x i16> %ins7, i16 %a8, i32 8 ++ %ins9 = insertelement <16 x i16> %ins8, i16 %a9, i32 9 ++ %ins10 = insertelement <16 x i16> %ins9, i16 %a10, i32 10 ++ %ins11 = insertelement <16 x i16> %ins10, i16 %a11, i32 11 ++ %ins12 = insertelement <16 x i16> %ins11, i16 %a12, i32 12 ++ %ins13 = insertelement <16 x i16> %ins12, i16 %a13, i32 13 ++ %ins14 = insertelement <16 x i16> %ins13, i16 %a14, i32 14 ++ %ins15 = insertelement <16 x i16> %ins14, i16 %a15, i32 15 ++ store <16 x i16> %ins15, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { ++; CHECK-LABEL: buildvector_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 2 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 3 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a5, 4 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a6, 5 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a7, 6 ++; CHECK-NEXT: ld.w $a1, $sp, 0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <8 x i32> undef, i32 %a0, i32 0 ++ %ins1 = insertelement <8 x i32> %ins0, i32 %a1, i32 1 ++ %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2 ++ %ins3 = insertelement <8 x i32> %ins2, i32 %a3, i32 3 ++ %ins4 = insertelement <8 x i32> %ins3, i32 %a4, i32 4 ++ %ins5 = insertelement <8 x i32> %ins4, i32 %a5, i32 5 ++ %ins6 = insertelement <8 x i32> %ins5, i32 %a6, i32 6 ++ %ins7 = insertelement <8 x i32> %ins6, i32 %a7, i32 7 ++ store <8 x i32> %ins7, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { ++; CHECK-LABEL: buildvector_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a3, 2 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a4, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 ++ %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 ++ %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2 ++ %ins3 = insertelement <4 x i64> %ins2, i64 %a3, i32 3 ++ store <4 x i64> %ins3, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { ++; CHECK-LABEL: buildvector_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: movfr2gr.s $a1, $fa0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 ++; CHECK-NEXT: movfr2gr.s $a1, $fa1 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1 ++; CHECK-NEXT: movfr2gr.s $a1, $fa2 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2 ++; CHECK-NEXT: movfr2gr.s $a1, $fa3 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 3 ++; CHECK-NEXT: movfr2gr.s $a1, $fa4 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 4 ++; CHECK-NEXT: movfr2gr.s $a1, $fa5 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5 ++; CHECK-NEXT: movfr2gr.s $a1, $fa6 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 6 ++; CHECK-NEXT: movfr2gr.s $a1, $fa7 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <8 x float> undef, float %a0, i32 0 ++ %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1 ++ %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2 ++ %ins3 = insertelement <8 x float> %ins2, float %a3, i32 3 ++ %ins4 = insertelement <8 x float> %ins3, float %a4, i32 4 ++ %ins5 = insertelement <8 x float> %ins4, float %a5, i32 5 ++ %ins6 = insertelement <8 x float> %ins5, float %a6, i32 6 ++ %ins7 = insertelement <8 x float> %ins6, float %a7, i32 7 ++ store <8 x float> %ins7, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind { ++; CHECK-LABEL: buildvector_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: movfr2gr.d $a1, $fa0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 ++; CHECK-NEXT: movfr2gr.d $a1, $fa1 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1 ++; CHECK-NEXT: movfr2gr.d $a1, $fa2 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2 ++; CHECK-NEXT: movfr2gr.d $a1, $fa3 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <4 x double> undef, double %a0, i32 0 ++ %ins1 = insertelement <4 x double> %ins0, double %a1, i32 1 ++ %ins2 = insertelement <4 x double> %ins1, double %a2, i32 2 ++ %ins3 = insertelement <4 x double> %ins2, double %a3, i32 3 ++ store <4 x double> %ins3, ptr %dst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +new file mode 100644 +index 000000000000..3a74db5e1acb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +@@ -0,0 +1,376 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @buildvector_v16i8_splat(ptr %dst, i8 %a0) nounwind { ++; CHECK-LABEL: buildvector_v16i8_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.b $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <16 x i8> undef, i8 %a0, i8 0 ++ %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ++ store <16 x i8> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i16_splat(ptr %dst, i16 %a0) nounwind { ++; CHECK-LABEL: buildvector_v8i16_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.h $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <8 x i16> undef, i16 %a0, i8 0 ++ %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ++ store <8 x i16> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i32_splat(ptr %dst, i32 %a0) nounwind { ++; CHECK-LABEL: buildvector_v4i32_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <4 x i32> undef, i32 %a0, i8 0 ++ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer ++ store <4 x i32> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2i64_splat(ptr %dst, i64 %a0) nounwind { ++; CHECK-LABEL: buildvector_v2i64_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <2 x i64> undef, i64 %a0, i8 0 ++ %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer ++ store <2 x i64> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f32_splat(ptr %dst, float %a0) nounwind { ++; CHECK-LABEL: buildvector_v4f32_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 ++; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <4 x float> undef, float %a0, i8 0 ++ %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer ++ store <4 x float> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f64_splat(ptr %dst, double %a0) nounwind { ++; CHECK-LABEL: buildvector_v2f64_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 ++; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <2 x double> undef, double %a0, i8 0 ++ %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer ++ store <2 x double> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i8_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v16i8_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.b $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <16 x i8> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i16_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v8i16_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.h $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x i16> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i32_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4i32_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.w $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x i32> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2i64_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2i64_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.d $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <2 x i64> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f32_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $a1, 260096 ++; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x float> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f64_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f64_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu52i.d $a1, $zero, 1023 ++; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <2 x double> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i8_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v16i8_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <16 x i8> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i16_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v8i16_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x i16> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i32_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4i32_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x i32> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2i64_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2i64_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <2 x i64> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f32_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f32_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x float> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f64_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f64_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <2 x double> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { ++; CHECK-LABEL: buildvector_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 ++; CHECK-NEXT: ld.b $a1, $sp, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 ++; CHECK-NEXT: ld.b $a1, $sp, 8 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 ++; CHECK-NEXT: ld.b $a1, $sp, 16 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 ++; CHECK-NEXT: ld.b $a1, $sp, 24 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 ++; CHECK-NEXT: ld.b $a1, $sp, 32 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 ++; CHECK-NEXT: ld.b $a1, $sp, 40 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 ++; CHECK-NEXT: ld.b $a1, $sp, 48 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 ++; CHECK-NEXT: ld.b $a1, $sp, 56 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 ++; CHECK-NEXT: ld.b $a1, $sp, 64 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 ++ %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 ++ %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 ++ %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 ++ %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4 ++ %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5 ++ %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 ++ %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7 ++ %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8 ++ %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9 ++ %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10 ++ %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11 ++ %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12 ++ %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13 ++ %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14 ++ %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15 ++ store <16 x i8> %ins15, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { ++; CHECK-LABEL: buildvector_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 ++; CHECK-NEXT: ld.h $a1, $sp, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 ++ %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 ++ %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2 ++ %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 ++ %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4 ++ %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5 ++ %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6 ++ %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7 ++ store <8 x i16> %ins7, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { ++; CHECK-LABEL: buildvector_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0 ++ %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1 ++ %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2 ++ %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3 ++ store <4 x i32> %ins3, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind { ++; CHECK-LABEL: buildvector_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 ++ %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 ++ store <2 x i64> %ins1, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind { ++; CHECK-LABEL: buildvector_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: movfr2gr.s $a1, $fa0 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 ++; CHECK-NEXT: movfr2gr.s $a1, $fa1 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1 ++; CHECK-NEXT: movfr2gr.s $a1, $fa2 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 2 ++; CHECK-NEXT: movfr2gr.s $a1, $fa3 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <4 x float> undef, float %a0, i32 0 ++ %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1 ++ %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2 ++ %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3 ++ store <4 x float> %ins3, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind { ++; CHECK-LABEL: buildvector_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: movfr2gr.d $a1, $fa0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 ++; CHECK-NEXT: movfr2gr.d $a1, $fa1 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <2 x double> undef, double %a0, i32 0 ++ %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 ++ store <2 x double> %ins1, ptr %dst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll +index 5060240cd8b1..d0be9cb7e3c8 100644 +--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll +@@ -180,10 +180,9 @@ entry: + define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { + ; CHECK-LABEL: mul_v16i8_17: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: ori $a2, $zero, 17 +-; CHECK-NEXT: vreplgr2vr.b $vr0, $a2 +-; CHECK-NEXT: vld $vr1, $a1, 0 +-; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.b $vr1, 17 ++; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 + ; CHECK-NEXT: vst $vr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +@@ -196,10 +195,9 @@ entry: + define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { + ; CHECK-LABEL: mul_v8i16_17: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: ori $a2, $zero, 17 +-; CHECK-NEXT: vreplgr2vr.h $vr0, $a2 +-; CHECK-NEXT: vld $vr1, $a1, 0 +-; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, 17 ++; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 + ; CHECK-NEXT: vst $vr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +@@ -212,10 +210,9 @@ entry: + define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { + ; CHECK-LABEL: mul_v4i32_17: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: ori $a2, $zero, 17 +-; CHECK-NEXT: vreplgr2vr.w $vr0, $a2 +-; CHECK-NEXT: vld $vr1, $a1, 0 +-; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.w $vr1, 17 ++; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 + ; CHECK-NEXT: vst $vr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +@@ -228,10 +225,9 @@ entry: + define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { + ; CHECK-LABEL: mul_v2i64_17: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: ori $a2, $zero, 17 +-; CHECK-NEXT: vreplgr2vr.d $vr0, $a2 +-; CHECK-NEXT: vld $vr1, $a1, 0 +-; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.d $vr1, 17 ++; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 + ; CHECK-NEXT: vst $vr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +-- +2.20.1 + diff --git a/0022-LoongArch-Disable-mulodi4-and-muloti4-libcalls-73199.patch b/0022-LoongArch-Disable-mulodi4-and-muloti4-libcalls-73199.patch new file mode 100644 index 0000000..818899c --- /dev/null +++ b/0022-LoongArch-Disable-mulodi4-and-muloti4-libcalls-73199.patch @@ -0,0 +1,528 @@ +From a506e3a165025f8f4088132f2f1082cd31e47b6c Mon Sep 17 00:00:00 2001 +From: hev +Date: Thu, 23 Nov 2023 19:34:50 +0800 +Subject: [PATCH 22/27] [LoongArch] Disable mulodi4 and muloti4 libcalls + (#73199) + +This library function only exists in compiler-rt not libgcc. So this +would fail to link unless we were linking with compiler-rt. + +Fixes https://github.com/ClangBuiltLinux/linux/issues/1958 + +(cherry picked from commit 0d9f557b6c36da3aa92daff4c0d37ea821d7ae1e) +--- + .../LoongArch/LoongArchISelLowering.cpp | 5 + + .../CodeGen/LoongArch/smul-with-overflow.ll | 463 +++++++++++++++--- + 2 files changed, 397 insertions(+), 71 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index f7eacd56c542..ed106cb766bc 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -152,8 +152,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + + // Set libcalls. + setLibcallName(RTLIB::MUL_I128, nullptr); ++ // The MULO libcall is not part of libgcc, only compiler-rt. ++ setLibcallName(RTLIB::MULO_I64, nullptr); + } + ++ // The MULO libcall is not part of libgcc, only compiler-rt. ++ setLibcallName(RTLIB::MULO_I128, nullptr); ++ + static const ISD::CondCode FPCCToExpand[] = { + ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, + ISD::SETGE, ISD::SETNE, ISD::SETGT}; +diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +index a53e77e5aa4b..6cba4108d63c 100644 +--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll ++++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +@@ -5,23 +5,53 @@ + define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { + ; LA32-LABEL: smuloi64: + ; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: .cfi_def_cfa_offset 16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +-; LA32-NEXT: .cfi_offset 1, -4 +-; LA32-NEXT: .cfi_offset 22, -8 +-; LA32-NEXT: move $fp, $a4 +-; LA32-NEXT: st.w $zero, $sp, 4 +-; LA32-NEXT: addi.w $a4, $sp, 4 +-; LA32-NEXT: bl %plt(__mulodi4) +-; LA32-NEXT: st.w $a1, $fp, 4 +-; LA32-NEXT: st.w $a0, $fp, 0 +-; LA32-NEXT: ld.w $a0, $sp, 4 ++; LA32-NEXT: srai.w $a5, $a1, 31 ++; LA32-NEXT: mul.w $a6, $a2, $a5 ++; LA32-NEXT: mulh.wu $a7, $a2, $a5 ++; LA32-NEXT: add.w $a7, $a7, $a6 ++; LA32-NEXT: mul.w $a5, $a3, $a5 ++; LA32-NEXT: add.w $a5, $a7, $a5 ++; LA32-NEXT: srai.w $a7, $a3, 31 ++; LA32-NEXT: mul.w $t0, $a7, $a1 ++; LA32-NEXT: mulh.wu $t1, $a7, $a0 ++; LA32-NEXT: add.w $t0, $t1, $t0 ++; LA32-NEXT: mul.w $a7, $a7, $a0 ++; LA32-NEXT: add.w $t0, $t0, $a7 ++; LA32-NEXT: add.w $a5, $t0, $a5 ++; LA32-NEXT: mulh.wu $t0, $a0, $a2 ++; LA32-NEXT: mul.w $t1, $a1, $a2 ++; LA32-NEXT: add.w $t0, $t1, $t0 ++; LA32-NEXT: sltu $t1, $t0, $t1 ++; LA32-NEXT: mulh.wu $t2, $a1, $a2 ++; LA32-NEXT: add.w $t1, $t2, $t1 ++; LA32-NEXT: mul.w $t2, $a0, $a3 ++; LA32-NEXT: add.w $t0, $t2, $t0 ++; LA32-NEXT: sltu $t2, $t0, $t2 ++; LA32-NEXT: mulh.wu $t3, $a0, $a3 ++; LA32-NEXT: add.w $t2, $t3, $t2 ++; LA32-NEXT: add.w $a6, $a7, $a6 ++; LA32-NEXT: sltu $a7, $a6, $a7 ++; LA32-NEXT: add.w $a5, $a5, $a7 ++; LA32-NEXT: mul.w $a0, $a0, $a2 ++; LA32-NEXT: mul.w $a2, $a1, $a3 ++; LA32-NEXT: mulh.wu $a1, $a1, $a3 ++; LA32-NEXT: add.w $a3, $t1, $t2 ++; LA32-NEXT: sltu $a7, $a3, $t1 ++; LA32-NEXT: add.w $a1, $a1, $a7 ++; LA32-NEXT: st.w $a0, $a4, 0 ++; LA32-NEXT: add.w $a0, $a2, $a3 ++; LA32-NEXT: sltu $a2, $a0, $a2 ++; LA32-NEXT: add.w $a1, $a1, $a2 ++; LA32-NEXT: st.w $t0, $a4, 4 ++; LA32-NEXT: add.w $a1, $a1, $a5 ++; LA32-NEXT: add.w $a2, $a0, $a6 ++; LA32-NEXT: sltu $a0, $a2, $a0 ++; LA32-NEXT: add.w $a0, $a1, $a0 ++; LA32-NEXT: srai.w $a1, $t0, 31 ++; LA32-NEXT: xor $a0, $a0, $a1 ++; LA32-NEXT: xor $a1, $a2, $a1 ++; LA32-NEXT: or $a0, $a1, $a0 + ; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: smuloi64: +@@ -43,69 +73,360 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { + define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { + ; LA32-LABEL: smuloi128: + ; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -64 +-; LA32-NEXT: .cfi_def_cfa_offset 64 +-; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill +-; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill ++; LA32-NEXT: addi.w $sp, $sp, -96 ++; LA32-NEXT: .cfi_def_cfa_offset 96 ++; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill ++; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill + ; LA32-NEXT: .cfi_offset 1, -4 + ; LA32-NEXT: .cfi_offset 22, -8 +-; LA32-NEXT: move $fp, $a2 +-; LA32-NEXT: st.w $zero, $sp, 52 +-; LA32-NEXT: ld.w $a2, $a1, 12 +-; LA32-NEXT: st.w $a2, $sp, 12 +-; LA32-NEXT: ld.w $a2, $a1, 8 +-; LA32-NEXT: st.w $a2, $sp, 8 +-; LA32-NEXT: ld.w $a2, $a1, 4 +-; LA32-NEXT: st.w $a2, $sp, 4 +-; LA32-NEXT: ld.w $a1, $a1, 0 +-; LA32-NEXT: st.w $a1, $sp, 0 +-; LA32-NEXT: ld.w $a1, $a0, 12 +-; LA32-NEXT: st.w $a1, $sp, 28 +-; LA32-NEXT: ld.w $a1, $a0, 8 +-; LA32-NEXT: st.w $a1, $sp, 24 +-; LA32-NEXT: ld.w $a1, $a0, 4 +-; LA32-NEXT: st.w $a1, $sp, 20 +-; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: st.w $a0, $sp, 16 +-; LA32-NEXT: addi.w $a0, $sp, 32 +-; LA32-NEXT: addi.w $a1, $sp, 16 +-; LA32-NEXT: addi.w $a2, $sp, 0 +-; LA32-NEXT: addi.w $a3, $sp, 52 +-; LA32-NEXT: bl %plt(__muloti4) +-; LA32-NEXT: ld.w $a0, $sp, 44 +-; LA32-NEXT: st.w $a0, $fp, 12 +-; LA32-NEXT: ld.w $a0, $sp, 40 +-; LA32-NEXT: st.w $a0, $fp, 8 +-; LA32-NEXT: ld.w $a0, $sp, 36 +-; LA32-NEXT: st.w $a0, $fp, 4 +-; LA32-NEXT: ld.w $a0, $sp, 32 +-; LA32-NEXT: st.w $a0, $fp, 0 +-; LA32-NEXT: ld.w $a0, $sp, 52 ++; LA32-NEXT: .cfi_offset 23, -12 ++; LA32-NEXT: .cfi_offset 24, -16 ++; LA32-NEXT: .cfi_offset 25, -20 ++; LA32-NEXT: .cfi_offset 26, -24 ++; LA32-NEXT: .cfi_offset 27, -28 ++; LA32-NEXT: .cfi_offset 28, -32 ++; LA32-NEXT: .cfi_offset 29, -36 ++; LA32-NEXT: .cfi_offset 30, -40 ++; LA32-NEXT: .cfi_offset 31, -44 ++; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ld.w $a6, $a1, 0 ++; LA32-NEXT: ld.w $a7, $a0, 0 ++; LA32-NEXT: mulh.wu $a3, $a7, $a6 ++; LA32-NEXT: ld.w $a5, $a0, 4 ++; LA32-NEXT: mul.w $a4, $a5, $a6 ++; LA32-NEXT: add.w $a3, $a4, $a3 ++; LA32-NEXT: sltu $a4, $a3, $a4 ++; LA32-NEXT: mulh.wu $t0, $a5, $a6 ++; LA32-NEXT: add.w $a4, $t0, $a4 ++; LA32-NEXT: ld.w $t0, $a1, 4 ++; LA32-NEXT: mul.w $t1, $a7, $t0 ++; LA32-NEXT: add.w $a3, $t1, $a3 ++; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill ++; LA32-NEXT: sltu $t1, $a3, $t1 ++; LA32-NEXT: mulh.wu $t2, $a7, $t0 ++; LA32-NEXT: add.w $t1, $t2, $t1 ++; LA32-NEXT: ld.w $t4, $a0, 12 ++; LA32-NEXT: ld.w $t2, $a0, 8 ++; LA32-NEXT: ld.w $t3, $a1, 8 ++; LA32-NEXT: mulh.wu $a0, $t2, $t3 ++; LA32-NEXT: mul.w $t5, $t4, $t3 ++; LA32-NEXT: add.w $a0, $t5, $a0 ++; LA32-NEXT: sltu $t5, $a0, $t5 ++; LA32-NEXT: mulh.wu $t6, $t4, $t3 ++; LA32-NEXT: add.w $t5, $t6, $t5 ++; LA32-NEXT: ld.w $t7, $a1, 12 ++; LA32-NEXT: mul.w $a1, $t2, $t7 ++; LA32-NEXT: add.w $a0, $a1, $a0 ++; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill ++; LA32-NEXT: sltu $a1, $a0, $a1 ++; LA32-NEXT: mulh.wu $t6, $t2, $t7 ++; LA32-NEXT: add.w $t6, $t6, $a1 ++; LA32-NEXT: srai.w $s7, $t4, 31 ++; LA32-NEXT: mul.w $a1, $s7, $t7 ++; LA32-NEXT: mulh.wu $t8, $s7, $t3 ++; LA32-NEXT: add.w $t8, $t8, $a1 ++; LA32-NEXT: mulh.wu $fp, $a6, $s7 ++; LA32-NEXT: mul.w $s6, $t0, $s7 ++; LA32-NEXT: add.w $s8, $s6, $fp ++; LA32-NEXT: mul.w $a1, $a6, $s7 ++; LA32-NEXT: add.w $ra, $a1, $s8 ++; LA32-NEXT: sltu $s0, $ra, $a1 ++; LA32-NEXT: add.w $a0, $fp, $s0 ++; LA32-NEXT: add.w $a3, $a4, $t1 ++; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill ++; LA32-NEXT: sltu $a4, $a3, $a4 ++; LA32-NEXT: mulh.wu $t1, $a5, $t0 ++; LA32-NEXT: add.w $a3, $t1, $a4 ++; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill ++; LA32-NEXT: srai.w $s4, $t7, 31 ++; LA32-NEXT: mul.w $fp, $a7, $s4 ++; LA32-NEXT: mulh.wu $a4, $a7, $s4 ++; LA32-NEXT: add.w $s1, $a4, $fp ++; LA32-NEXT: sltu $s0, $s1, $fp ++; LA32-NEXT: add.w $s5, $a4, $s0 ++; LA32-NEXT: mul.w $a4, $s7, $t3 ++; LA32-NEXT: add.w $t8, $t8, $a4 ++; LA32-NEXT: add.w $s0, $ra, $t8 ++; LA32-NEXT: add.w $a3, $a1, $a4 ++; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill ++; LA32-NEXT: sltu $a4, $a3, $a1 ++; LA32-NEXT: add.w $a3, $s0, $a4 ++; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill ++; LA32-NEXT: add.w $s3, $t5, $t6 ++; LA32-NEXT: sltu $a4, $s3, $t5 ++; LA32-NEXT: mulh.wu $t5, $t4, $t7 ++; LA32-NEXT: add.w $a3, $t5, $a4 ++; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill ++; LA32-NEXT: mul.w $a4, $a7, $a6 ++; LA32-NEXT: st.w $a4, $a2, 0 ++; LA32-NEXT: sltu $a4, $s8, $s6 ++; LA32-NEXT: mulh.wu $t5, $t0, $s7 ++; LA32-NEXT: add.w $a4, $t5, $a4 ++; LA32-NEXT: add.w $t1, $a4, $a0 ++; LA32-NEXT: sltu $a4, $t1, $a4 ++; LA32-NEXT: add.w $s2, $t5, $a4 ++; LA32-NEXT: mulh.wu $a4, $a7, $t3 ++; LA32-NEXT: mul.w $t5, $a5, $t3 ++; LA32-NEXT: add.w $a4, $t5, $a4 ++; LA32-NEXT: sltu $t5, $a4, $t5 ++; LA32-NEXT: mulh.wu $t6, $a5, $t3 ++; LA32-NEXT: add.w $a3, $t6, $t5 ++; LA32-NEXT: mul.w $t6, $a7, $t7 ++; LA32-NEXT: add.w $t5, $t6, $a4 ++; LA32-NEXT: sltu $a4, $t5, $t6 ++; LA32-NEXT: mulh.wu $t6, $a7, $t7 ++; LA32-NEXT: add.w $a4, $t6, $a4 ++; LA32-NEXT: mulh.wu $t6, $t2, $a6 ++; LA32-NEXT: mul.w $s7, $t4, $a6 ++; LA32-NEXT: add.w $t6, $s7, $t6 ++; LA32-NEXT: sltu $s7, $t6, $s7 ++; LA32-NEXT: mulh.wu $s8, $t4, $a6 ++; LA32-NEXT: add.w $a0, $s8, $s7 ++; LA32-NEXT: mul.w $s7, $t2, $t0 ++; LA32-NEXT: add.w $t6, $s7, $t6 ++; LA32-NEXT: sltu $s7, $t6, $s7 ++; LA32-NEXT: mulh.wu $s8, $t2, $t0 ++; LA32-NEXT: add.w $a2, $s8, $s7 ++; LA32-NEXT: mul.w $s8, $a5, $s4 ++; LA32-NEXT: add.w $s7, $s1, $s8 ++; LA32-NEXT: add.w $s1, $s7, $ra ++; LA32-NEXT: add.w $a1, $fp, $a1 ++; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill ++; LA32-NEXT: sltu $ra, $a1, $fp ++; LA32-NEXT: add.w $a1, $s1, $ra ++; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill ++; LA32-NEXT: xor $s0, $a1, $s7 ++; LA32-NEXT: sltui $s0, $s0, 1 ++; LA32-NEXT: sltu $a1, $a1, $s7 ++; LA32-NEXT: masknez $s1, $a1, $s0 ++; LA32-NEXT: maskeqz $s0, $ra, $s0 ++; LA32-NEXT: add.w $t1, $s6, $t1 ++; LA32-NEXT: sltu $s6, $t1, $s6 ++; LA32-NEXT: add.w $s2, $s2, $s6 ++; LA32-NEXT: add.w $a2, $a0, $a2 ++; LA32-NEXT: sltu $a0, $a2, $a0 ++; LA32-NEXT: mulh.wu $s6, $t4, $t0 ++; LA32-NEXT: add.w $t8, $s6, $a0 ++; LA32-NEXT: add.w $a4, $a3, $a4 ++; LA32-NEXT: sltu $a3, $a4, $a3 ++; LA32-NEXT: mulh.wu $s6, $a5, $t7 ++; LA32-NEXT: add.w $a3, $s6, $a3 ++; LA32-NEXT: mul.w $s6, $t4, $t7 ++; LA32-NEXT: mul.w $t7, $a5, $t7 ++; LA32-NEXT: mul.w $ra, $t4, $t0 ++; LA32-NEXT: mul.w $t0, $a5, $t0 ++; LA32-NEXT: mul.w $t4, $t4, $s4 ++; LA32-NEXT: mul.w $a7, $a7, $t3 ++; LA32-NEXT: mul.w $a6, $t2, $a6 ++; LA32-NEXT: mul.w $t3, $t2, $t3 ++; LA32-NEXT: mul.w $a0, $t2, $s4 ++; LA32-NEXT: mulh.wu $t2, $t2, $s4 ++; LA32-NEXT: mulh.wu $a5, $s4, $a5 ++; LA32-NEXT: sltu $s4, $s7, $s8 ++; LA32-NEXT: add.w $s4, $a5, $s4 ++; LA32-NEXT: add.w $s4, $s5, $s4 ++; LA32-NEXT: sltu $s5, $s4, $s5 ++; LA32-NEXT: add.w $s5, $a5, $s5 ++; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a1, $t0, $a1 ++; LA32-NEXT: sltu $a5, $a1, $t0 ++; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload ++; LA32-NEXT: add.w $t0, $t0, $a5 ++; LA32-NEXT: or $s0, $s0, $s1 ++; LA32-NEXT: add.w $a4, $t7, $a4 ++; LA32-NEXT: sltu $a5, $a4, $t7 ++; LA32-NEXT: add.w $t7, $a3, $a5 ++; LA32-NEXT: add.w $s1, $ra, $a2 ++; LA32-NEXT: sltu $a2, $s1, $ra ++; LA32-NEXT: add.w $t8, $t8, $a2 ++; LA32-NEXT: add.w $a5, $s6, $s3 ++; LA32-NEXT: sltu $a2, $a5, $s6 ++; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a2, $a3, $a2 ++; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload ++; LA32-NEXT: st.w $a3, $s6, 4 ++; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a3, $s2, $a3 ++; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload ++; LA32-NEXT: add.w $s2, $t1, $s2 ++; LA32-NEXT: sltu $t1, $s2, $t1 ++; LA32-NEXT: add.w $a3, $a3, $t1 ++; LA32-NEXT: add.w $t1, $s8, $s4 ++; LA32-NEXT: sltu $s3, $t1, $s8 ++; LA32-NEXT: add.w $s3, $s5, $s3 ++; LA32-NEXT: add.w $t2, $t2, $a0 ++; LA32-NEXT: add.w $t2, $t2, $t4 ++; LA32-NEXT: add.w $t2, $t2, $s7 ++; LA32-NEXT: add.w $t4, $a0, $fp ++; LA32-NEXT: sltu $a0, $t4, $a0 ++; LA32-NEXT: add.w $a0, $t2, $a0 ++; LA32-NEXT: add.w $a0, $s3, $a0 ++; LA32-NEXT: add.w $t2, $t1, $t4 ++; LA32-NEXT: sltu $t1, $t2, $t1 ++; LA32-NEXT: add.w $a0, $a0, $t1 ++; LA32-NEXT: add.w $a0, $a0, $a3 ++; LA32-NEXT: add.w $t1, $t2, $s2 ++; LA32-NEXT: sltu $a3, $t1, $t2 ++; LA32-NEXT: add.w $a0, $a0, $a3 ++; LA32-NEXT: add.w $a3, $t6, $t0 ++; LA32-NEXT: add.w $a1, $a6, $a1 ++; LA32-NEXT: sltu $a6, $a1, $a6 ++; LA32-NEXT: add.w $t0, $a3, $a6 ++; LA32-NEXT: add.w $a1, $a7, $a1 ++; LA32-NEXT: sltu $a7, $a1, $a7 ++; LA32-NEXT: add.w $a3, $t5, $t0 ++; LA32-NEXT: add.w $a3, $a3, $a7 ++; LA32-NEXT: sltu $t2, $a3, $t5 ++; LA32-NEXT: xor $t4, $a3, $t5 ++; LA32-NEXT: sltui $t4, $t4, 1 ++; LA32-NEXT: masknez $t2, $t2, $t4 ++; LA32-NEXT: maskeqz $a7, $a7, $t4 ++; LA32-NEXT: st.w $a1, $s6, 8 ++; LA32-NEXT: or $a1, $a7, $t2 ++; LA32-NEXT: sltu $a7, $t0, $t6 ++; LA32-NEXT: xor $t0, $t0, $t6 ++; LA32-NEXT: sltui $t0, $t0, 1 ++; LA32-NEXT: masknez $a7, $a7, $t0 ++; LA32-NEXT: maskeqz $a6, $a6, $t0 ++; LA32-NEXT: or $a6, $a6, $a7 ++; LA32-NEXT: add.w $a6, $s1, $a6 ++; LA32-NEXT: sltu $a7, $a6, $s1 ++; LA32-NEXT: add.w $a7, $t8, $a7 ++; LA32-NEXT: add.w $a1, $a4, $a1 ++; LA32-NEXT: sltu $a4, $a1, $a4 ++; LA32-NEXT: add.w $a4, $t7, $a4 ++; LA32-NEXT: add.w $t0, $t1, $s0 ++; LA32-NEXT: sltu $t1, $t0, $t1 ++; LA32-NEXT: add.w $a0, $a0, $t1 ++; LA32-NEXT: st.w $a3, $s6, 12 ++; LA32-NEXT: add.w $a1, $a6, $a1 ++; LA32-NEXT: sltu $a6, $a1, $a6 ++; LA32-NEXT: add.w $a4, $a7, $a4 ++; LA32-NEXT: add.w $a4, $a4, $a6 ++; LA32-NEXT: sltu $t1, $a4, $a7 ++; LA32-NEXT: xor $a7, $a4, $a7 ++; LA32-NEXT: sltui $a7, $a7, 1 ++; LA32-NEXT: masknez $t1, $t1, $a7 ++; LA32-NEXT: maskeqz $a6, $a6, $a7 ++; LA32-NEXT: or $a6, $a6, $t1 ++; LA32-NEXT: add.w $a6, $a5, $a6 ++; LA32-NEXT: sltu $a5, $a6, $a5 ++; LA32-NEXT: add.w $a2, $a2, $a5 ++; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a4, $t1, $a4 ++; LA32-NEXT: add.w $a1, $t3, $a1 ++; LA32-NEXT: sltu $a5, $a1, $t3 ++; LA32-NEXT: add.w $a4, $a4, $a5 ++; LA32-NEXT: sltu $a7, $a4, $t1 ++; LA32-NEXT: xor $t1, $a4, $t1 ++; LA32-NEXT: sltui $t1, $t1, 1 ++; LA32-NEXT: masknez $a7, $a7, $t1 ++; LA32-NEXT: maskeqz $a5, $a5, $t1 ++; LA32-NEXT: or $a5, $a5, $a7 ++; LA32-NEXT: add.w $a5, $a6, $a5 ++; LA32-NEXT: sltu $a6, $a5, $a6 ++; LA32-NEXT: add.w $a2, $a2, $a6 ++; LA32-NEXT: add.w $a0, $a2, $a0 ++; LA32-NEXT: add.w $a2, $a5, $t0 ++; LA32-NEXT: sltu $a5, $a2, $a5 ++; LA32-NEXT: add.w $a0, $a0, $a5 ++; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a5, $a1, $a5 ++; LA32-NEXT: sltu $a1, $a5, $a1 ++; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a6, $a4, $a6 ++; LA32-NEXT: add.w $a6, $a6, $a1 ++; LA32-NEXT: sltu $a7, $a6, $a4 ++; LA32-NEXT: xor $a4, $a6, $a4 ++; LA32-NEXT: sltui $a4, $a4, 1 ++; LA32-NEXT: masknez $a7, $a7, $a4 ++; LA32-NEXT: maskeqz $a1, $a1, $a4 ++; LA32-NEXT: or $a1, $a1, $a7 ++; LA32-NEXT: add.w $a1, $a2, $a1 ++; LA32-NEXT: sltu $a2, $a1, $a2 ++; LA32-NEXT: add.w $a0, $a0, $a2 ++; LA32-NEXT: srai.w $a2, $a3, 31 ++; LA32-NEXT: xor $a3, $a6, $a2 ++; LA32-NEXT: xor $a0, $a0, $a2 ++; LA32-NEXT: or $a0, $a3, $a0 ++; LA32-NEXT: xor $a3, $a5, $a2 ++; LA32-NEXT: xor $a1, $a1, $a2 ++; LA32-NEXT: or $a1, $a3, $a1 ++; LA32-NEXT: or $a0, $a1, $a0 + ; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload +-; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 64 ++; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 96 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: smuloi128: + ; LA64: # %bb.0: +-; LA64-NEXT: addi.d $sp, $sp, -32 +-; LA64-NEXT: .cfi_def_cfa_offset 32 +-; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +-; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +-; LA64-NEXT: .cfi_offset 1, -8 +-; LA64-NEXT: .cfi_offset 22, -16 +-; LA64-NEXT: move $fp, $a4 +-; LA64-NEXT: st.d $zero, $sp, 8 +-; LA64-NEXT: addi.d $a4, $sp, 8 +-; LA64-NEXT: bl %plt(__muloti4) +-; LA64-NEXT: st.d $a1, $fp, 8 +-; LA64-NEXT: st.d $a0, $fp, 0 +-; LA64-NEXT: ld.d $a0, $sp, 8 ++; LA64-NEXT: srai.d $a5, $a1, 63 ++; LA64-NEXT: mul.d $a6, $a2, $a5 ++; LA64-NEXT: mulh.du $a7, $a2, $a5 ++; LA64-NEXT: add.d $a7, $a7, $a6 ++; LA64-NEXT: mul.d $a5, $a3, $a5 ++; LA64-NEXT: add.d $a5, $a7, $a5 ++; LA64-NEXT: srai.d $a7, $a3, 63 ++; LA64-NEXT: mul.d $t0, $a7, $a1 ++; LA64-NEXT: mulh.du $t1, $a7, $a0 ++; LA64-NEXT: add.d $t0, $t1, $t0 ++; LA64-NEXT: mul.d $a7, $a7, $a0 ++; LA64-NEXT: add.d $t0, $t0, $a7 ++; LA64-NEXT: add.d $a5, $t0, $a5 ++; LA64-NEXT: mulh.du $t0, $a0, $a2 ++; LA64-NEXT: mul.d $t1, $a1, $a2 ++; LA64-NEXT: add.d $t0, $t1, $t0 ++; LA64-NEXT: sltu $t1, $t0, $t1 ++; LA64-NEXT: mulh.du $t2, $a1, $a2 ++; LA64-NEXT: add.d $t1, $t2, $t1 ++; LA64-NEXT: mul.d $t2, $a0, $a3 ++; LA64-NEXT: add.d $t0, $t2, $t0 ++; LA64-NEXT: sltu $t2, $t0, $t2 ++; LA64-NEXT: mulh.du $t3, $a0, $a3 ++; LA64-NEXT: add.d $t2, $t3, $t2 ++; LA64-NEXT: add.d $a6, $a7, $a6 ++; LA64-NEXT: sltu $a7, $a6, $a7 ++; LA64-NEXT: add.d $a5, $a5, $a7 ++; LA64-NEXT: mul.d $a0, $a0, $a2 ++; LA64-NEXT: mul.d $a2, $a1, $a3 ++; LA64-NEXT: mulh.du $a1, $a1, $a3 ++; LA64-NEXT: add.d $a3, $t1, $t2 ++; LA64-NEXT: sltu $a7, $a3, $t1 ++; LA64-NEXT: add.d $a1, $a1, $a7 ++; LA64-NEXT: st.d $a0, $a4, 0 ++; LA64-NEXT: add.d $a0, $a2, $a3 ++; LA64-NEXT: sltu $a2, $a0, $a2 ++; LA64-NEXT: add.d $a1, $a1, $a2 ++; LA64-NEXT: st.d $t0, $a4, 8 ++; LA64-NEXT: add.d $a1, $a1, $a5 ++; LA64-NEXT: add.d $a2, $a0, $a6 ++; LA64-NEXT: sltu $a0, $a2, $a0 ++; LA64-NEXT: add.d $a0, $a1, $a0 ++; LA64-NEXT: srai.d $a1, $t0, 63 ++; LA64-NEXT: xor $a0, $a0, $a1 ++; LA64-NEXT: xor $a1, $a2, $a1 ++; LA64-NEXT: or $a0, $a1, $a0 + ; LA64-NEXT: sltu $a0, $zero, $a0 +-; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +-; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +-; LA64-NEXT: addi.d $sp, $sp, 32 + ; LA64-NEXT: ret + %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) + %val = extractvalue {i128, i1} %t, 0 +-- +2.20.1 + diff --git a/0022-LoongArch-Fix-test-issue-of-init-loongarch.c.patch b/0022-LoongArch-Fix-test-issue-of-init-loongarch.c.patch new file mode 100644 index 0000000..b0faf1d --- /dev/null +++ b/0022-LoongArch-Fix-test-issue-of-init-loongarch.c.patch @@ -0,0 +1,26 @@ +From 06b7b3c7b121871e6f7e4f113f956683c6fdd642 Mon Sep 17 00:00:00 2001 +From: Ami-zhang +Date: Tue, 23 Jul 2024 15:20:30 +0800 +Subject: [PATCH 22/23] [LoongArch] Fix test issue of init-loongarch.c + +(cherry picked from commit d59925c39856f255f4dd4427ccc650f2c2692a24) +--- + clang/test/Preprocessor/init-loongarch.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +index 7ce3d2de8c78..887b6d6af7e1 100644 +--- a/clang/test/Preprocessor/init-loongarch.c ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -825,7 +825,7 @@ + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \ + // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=la664 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s ++// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \ + // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s + // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \ +-- +2.20.1 + diff --git a/0023-LoongArch-Add-more-and-or-xor-patterns-for-vector-ty.patch b/0023-LoongArch-Add-more-and-or-xor-patterns-for-vector-ty.patch new file mode 100644 index 0000000..f1d5044 --- /dev/null +++ b/0023-LoongArch-Add-more-and-or-xor-patterns-for-vector-ty.patch @@ -0,0 +1,896 @@ +From cfcec815edf0c38df1e25b28de76c1b1f7e35238 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 14 Nov 2023 17:58:52 +0800 +Subject: [PATCH 23/42] [LoongArch] Add more and/or/xor patterns for vector + types + +(cherry picked from commit ca66df3b021017fedf08f0779f5bfc7898dbdd29) + +--- + .../LoongArch/LoongArchLASXInstrInfo.td | 21 +-- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 21 +-- + .../LoongArch/lasx/ir-instruction/and.ll | 125 ++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/or.ll | 125 ++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/xor.ll | 125 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/and.ll | 125 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/or.ll | 125 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/xor.ll | 125 ++++++++++++++++++ + 8 files changed, 774 insertions(+), 18 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 4487152fb42b..a5652472481a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1184,10 +1184,6 @@ multiclass PatShiftXrUimm { + (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; + } + +-class PatXrXrB +- : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), +- (Inst LASX256:$xj, LASX256:$xk)>; +- + let Predicates = [HasExtLASX] in { + + // XVADD_{B/H/W/D} +@@ -1235,13 +1231,20 @@ defm : PatXrXr; + defm : PatXrXrU; + + // XVAND_V +-def : PatXrXrB; +-// XVNOR_V +-def : PatXrXrB; ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in ++def : Pat<(and (vt LASX256:$xj), (vt LASX256:$xk)), ++ (XVAND_V LASX256:$xj, LASX256:$xk)>; ++// XVOR_V ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in ++def : Pat<(or (vt LASX256:$xj), (vt LASX256:$xk)), ++ (XVOR_V LASX256:$xj, LASX256:$xk)>; + // XVXOR_V +-def : PatXrXrB; ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in ++def : Pat<(xor (vt LASX256:$xj), (vt LASX256:$xk)), ++ (XVXOR_V LASX256:$xj, LASX256:$xk)>; + // XVNOR_V +-def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 LASX256:$xk))), ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in ++def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))), + (XVNOR_V LASX256:$xj, LASX256:$xk)>; + + // XVANDI_B +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index deac5015882d..5645ce51194a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1261,10 +1261,6 @@ multiclass PatShiftVrUimm { + (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; + } + +-class PatVrVrB +- : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), +- (Inst LSX128:$vj, LSX128:$vk)>; +- + let Predicates = [HasExtLSX] in { + + // VADD_{B/H/W/D} +@@ -1312,13 +1308,20 @@ defm : PatVrVr; + defm : PatVrVrU; + + // VAND_V +-def : PatVrVrB; +-// VNOR_V +-def : PatVrVrB; ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in ++def : Pat<(and (vt LSX128:$vj), (vt LSX128:$vk)), ++ (VAND_V LSX128:$vj, LSX128:$vk)>; ++// VOR_V ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in ++def : Pat<(or (vt LSX128:$vj), (vt LSX128:$vk)), ++ (VOR_V LSX128:$vj, LSX128:$vk)>; + // VXOR_V +-def : PatVrVrB; ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in ++def : Pat<(xor (vt LSX128:$vj), (vt LSX128:$vk)), ++ (VXOR_V LSX128:$vj, LSX128:$vk)>; + // VNOR_V +-def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in ++def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))), + (VNOR_V LSX128:$vj, LSX128:$vk)>; + + // VANDI_B +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll +new file mode 100644 +index 000000000000..98c87cadeeb5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @and_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = and <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = and <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = and <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = and <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @and_u_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvandi.b $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = and <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v16i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, 31 ++; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = and <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v8i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.w $xr1, 31 ++; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = and <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v4i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.d $xr1, 31 ++; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = and <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll +new file mode 100644 +index 000000000000..f37cbf1cefed +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @or_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = or <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = or <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = or <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = or <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @or_u_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvori.b $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = or <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v16i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, 31 ++; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = or <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v8i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.w $xr1, 31 ++; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = or <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v4i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.d $xr1, 31 ++; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = or <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll +new file mode 100644 +index 000000000000..c2fb1462b7a2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @xor_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = xor <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = xor <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = xor <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = xor <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_u_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvxori.b $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = xor <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v16i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, 31 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = xor <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v8i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.w $xr1, 31 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = xor <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v4i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.d $xr1, 31 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = xor <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll +new file mode 100644 +index 000000000000..523255159a81 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @and_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = and <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = and <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = and <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = and <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @and_u_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vandi.b $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = and <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v8i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, 31 ++; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = and <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v4i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.w $vr1, 31 ++; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = and <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v2i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.d $vr1, 31 ++; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = and <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll +new file mode 100644 +index 000000000000..f124512acce7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @or_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = or <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = or <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = or <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = or <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @or_u_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vori.b $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = or <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v8i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, 31 ++; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = or <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v4i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.w $vr1, 31 ++; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = or <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v2i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.d $vr1, 31 ++; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = or <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll +new file mode 100644 +index 000000000000..ce3e49c990ff +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @xor_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = xor <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = xor <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = xor <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = xor <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_u_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vxori.b $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = xor <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v8i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, 31 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = xor <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v4i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.w $vr1, 31 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = xor <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v2i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.d $vr1, 31 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = xor <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0023-LoongArch-Fix-pattern-for-FNMSUB_-S-D-instructions-7.patch b/0023-LoongArch-Fix-pattern-for-FNMSUB_-S-D-instructions-7.patch new file mode 100644 index 0000000..bd28a29 --- /dev/null +++ b/0023-LoongArch-Fix-pattern-for-FNMSUB_-S-D-instructions-7.patch @@ -0,0 +1,732 @@ +From 92f1e37509b7ef4829302213743a0f3c464c3e79 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 29 Nov 2023 15:21:21 +0800 +Subject: [PATCH 23/27] [LoongArch] Fix pattern for FNMSUB_{S/D} instructions + (#73742) + +``` +when a=c=-0.0, b=0.0: +-(a * b + (-c)) = -0.0 +-a * b + c = 0.0 +(fneg (fma a, b (-c))) != (fma (fneg a), b ,c) +``` + +See https://reviews.llvm.org/D90901 for a similar discussion on X86. + +(cherry picked from commit 5e7e0d603204ede803323a825318e365a87f73e9) +--- + .../LoongArch/LoongArchFloat32InstrInfo.td | 8 +- + .../LoongArch/LoongArchFloat64InstrInfo.td | 6 +- + llvm/test/CodeGen/LoongArch/double-fma.ll | 259 ++++++++++++++++-- + llvm/test/CodeGen/LoongArch/float-fma.ll | 259 ++++++++++++++++-- + 4 files changed, 483 insertions(+), 49 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +index 826db54febd3..65120c083f49 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +@@ -294,8 +294,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)), + def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)), + (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + +-// fnmsub.s: -fj * fk + fa +-def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), ++// fnmsub.s: -(fj * fk - fa) ++def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))), ++ (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; ++ ++// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA) ++def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), + (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + } // Predicates = [HasBasicF] + +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +index 5118474725b6..437c1e4d7be2 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +@@ -256,7 +256,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)), + (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + + // fnmsub.d: -(fj * fk - fa) +-def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), ++def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))), ++ (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; ++ ++// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA) ++def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), + (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + } // Predicates = [HasBasicD] + +diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll +index 6dd628479433..58d20c62a668 100644 +--- a/llvm/test/CodeGen/LoongArch/double-fma.ll ++++ b/llvm/test/CodeGen/LoongArch/double-fma.ll +@@ -236,13 +236,15 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { + ; LA32-CONTRACT-ON-LABEL: fnmsub_d: + ; LA32-CONTRACT-ON: # %bb.0: + ; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 + ; LA32-CONTRACT-ON-NEXT: ret + ; + ; LA32-CONTRACT-OFF-LABEL: fnmsub_d: + ; LA32-CONTRACT-OFF: # %bb.0: + ; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 + ; LA32-CONTRACT-OFF-NEXT: ret + ; + ; LA64-CONTRACT-FAST-LABEL: fnmsub_d: +@@ -253,12 +255,98 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-ON-LABEL: fnmsub_d: + ; LA64-CONTRACT-ON: # %bb.0: + ; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 + ; LA64-CONTRACT-ON-NEXT: ret + ; + ; LA64-CONTRACT-OFF-LABEL: fnmsub_d: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg double %c ++ %mul = fmul double %a, %b ++ %add = fadd double %mul, %negc ++ %neg = fneg double %add ++ ret double %neg ++} ++ ++define double @fnmsub_d_nsz(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg nsz double %a ++ %mul = fmul nsz double %nega, %b ++ %add = fadd nsz double %mul, %c ++ ret double %add ++} ++ ++;; Check that fnmsub.d is not emitted. ++define double @not_fnmsub_d(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_d: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_d: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 + ; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a +@@ -483,6 +571,86 @@ define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg contract double %c ++ %mul = fmul contract double %a, %b ++ %add = fadd contract double %mul, %negc ++ %neg = fneg contract double %add ++ ret double %neg ++} ++ ++define double @contract_fnmsub_d_nsz(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg contract nsz double %a ++ %mul = fmul contract nsz double %nega, %b ++ %add = fadd contract nsz double %mul, %c ++ ret double %add ++} ++ ++;; Check that fnmsub.d is not emitted. ++define double @not_contract_fnmsub_d(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_d: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_d: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract double %a + %mul = fmul contract double %nega, %b +@@ -592,8 +760,8 @@ define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %fma = call double @llvm.fma.f64(double %a, double %b, double %c) +- %neg = fneg double %fma +- ret double %neg ++ %negfma = fneg double %fma ++ ret double %negfma + } + + define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { +@@ -704,44 +872,87 @@ define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg double %c ++ %fma = call double @llvm.fma.f64(double %a, double %b, double %negc) ++ %negfma = fneg double %fma ++ ret double %negfma ++} ++ ++define double @fnmsub_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a +- %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) ++ %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %fma + } + +-define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind { +-; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: ++;; Check that fnmsub.d is not emitted. ++define double @not_fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: + ; LA32-CONTRACT-FAST: # %bb.0: +-; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-FAST-NEXT: ret + ; +-; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: + ; LA32-CONTRACT-ON: # %bb.0: +-; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-ON-NEXT: ret + ; +-; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: + ; LA32-CONTRACT-OFF: # %bb.0: +-; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-OFF-NEXT: ret + ; +-; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: + ; LA64-CONTRACT-FAST: # %bb.0: +-; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-FAST-NEXT: ret + ; +-; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: + ; LA64-CONTRACT-ON: # %bb.0: +-; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-ON-NEXT: ret + ; +-; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: +-; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret +- %negb = fneg double %b +- %fma = call double @llvm.fma.f64(double %a, double %negb, double %c) ++ %nega = fneg double %a ++ %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %fma + } + +@@ -882,6 +1093,8 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b +- %sub = fsub contract double %c, %mul +- ret double %sub ++ %negc = fneg contract double %c ++ %add = fadd contract double %negc, %mul ++ %negadd = fneg contract double %add ++ ret double %negadd + } +diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll +index 54dc56784006..c236255d971a 100644 +--- a/llvm/test/CodeGen/LoongArch/float-fma.ll ++++ b/llvm/test/CodeGen/LoongArch/float-fma.ll +@@ -236,13 +236,15 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { + ; LA32-CONTRACT-ON-LABEL: fnmsub_s: + ; LA32-CONTRACT-ON: # %bb.0: + ; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 + ; LA32-CONTRACT-ON-NEXT: ret + ; + ; LA32-CONTRACT-OFF-LABEL: fnmsub_s: + ; LA32-CONTRACT-OFF: # %bb.0: + ; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 + ; LA32-CONTRACT-OFF-NEXT: ret + ; + ; LA64-CONTRACT-FAST-LABEL: fnmsub_s: +@@ -253,12 +255,98 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-ON-LABEL: fnmsub_s: + ; LA64-CONTRACT-ON: # %bb.0: + ; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 + ; LA64-CONTRACT-ON-NEXT: ret + ; + ; LA64-CONTRACT-OFF-LABEL: fnmsub_s: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg float %c ++ %mul = fmul float %a, %b ++ %add = fadd float %mul, %negc ++ %neg = fneg float %add ++ ret float %neg ++} ++ ++define float @fnmsub_s_nsz(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg nsz float %a ++ %mul = fmul nsz float %nega, %b ++ %add = fadd nsz float %mul, %c ++ ret float %add ++} ++ ++;; Check that fnmsub.s is not emitted. ++define float @not_fnmsub_s(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_s: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_s: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 + ; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a +@@ -483,6 +571,86 @@ define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg contract float %c ++ %mul = fmul contract float %a, %b ++ %add = fadd contract float %mul, %negc ++ %neg = fneg contract float %add ++ ret float %neg ++} ++ ++define float @contract_fnmsub_s_nsz(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg contract nsz float %a ++ %mul = fmul contract nsz float %nega, %b ++ %add = fadd contract nsz float %mul, %c ++ ret float %add ++} ++ ++;; Check that fnmsub.s is not emitted. ++define float @not_contract_fnmsub_s(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_s: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_s: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract float %a + %mul = fmul contract float %nega, %b +@@ -592,8 +760,8 @@ define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %fma = call float @llvm.fma.f64(float %a, float %b, float %c) +- %neg = fneg float %fma +- ret float %neg ++ %negfma = fneg float %fma ++ ret float %negfma + } + + define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { +@@ -704,44 +872,87 @@ define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg float %c ++ %fma = call float @llvm.fma.f64(float %a, float %b, float %negc) ++ %negfma = fneg float %fma ++ ret float %negfma ++} ++ ++define float @fnmsub_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a +- %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) ++ %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %c) + ret float %fma + } + +-define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { +-; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: ++;; Check that fnmsub.s is not emitted. ++define float @not_fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: + ; LA32-CONTRACT-FAST: # %bb.0: +-; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-FAST-NEXT: ret + ; +-; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: + ; LA32-CONTRACT-ON: # %bb.0: +-; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-ON-NEXT: ret + ; +-; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: + ; LA32-CONTRACT-OFF: # %bb.0: +-; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-OFF-NEXT: ret + ; +-; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: + ; LA64-CONTRACT-FAST: # %bb.0: +-; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-FAST-NEXT: ret + ; +-; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: + ; LA64-CONTRACT-ON: # %bb.0: +-; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-ON-NEXT: ret + ; +-; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: +-; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret +- %negb = fneg float %b +- %fma = call float @llvm.fma.f64(float %a, float %negb, float %c) ++ %nega = fneg float %a ++ %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) + ret float %fma + } + +@@ -882,6 +1093,8 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b +- %sub = fsub contract float %c, %mul +- ret float %sub ++ %negc = fneg contract float %c ++ %add = fadd contract float %negc, %mul ++ %negadd = fneg contract float %add ++ ret float %negadd + } +-- +2.20.1 + diff --git a/0023-LoongArch-Remove-experimental-auto-vec-feature.-1000.patch b/0023-LoongArch-Remove-experimental-auto-vec-feature.-1000.patch new file mode 100644 index 0000000..577997c --- /dev/null +++ b/0023-LoongArch-Remove-experimental-auto-vec-feature.-1000.patch @@ -0,0 +1,58 @@ +From b9c73c082e273e6c603915d57931f380851d5536 Mon Sep 17 00:00:00 2001 +From: Zhaoxin Yang +Date: Tue, 23 Jul 2024 15:19:00 +0800 +Subject: [PATCH 23/23] [LoongArch] Remove experimental `auto-vec` feature. + (#100070) + +Currently, automatic vectorization will be enabled with `-mlsx/-mlasx` +enabled. + +(cherry picked from commit 89d1eb67342d75d1de8d210157fdeaeb6a4724b6) +--- + llvm/lib/Target/LoongArch/LoongArch.td | 4 ---- + llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp | 2 -- + llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll | 2 +- + 3 files changed, 1 insertion(+), 7 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td +index b5cd5bb0f8a4..5f85cace71af 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch.td ++++ b/llvm/lib/Target/LoongArch/LoongArch.td +@@ -105,10 +105,6 @@ def FeatureUAL + def FeatureRelax + : SubtargetFeature<"relax", "HasLinkerRelax", "true", + "Enable Linker relaxation">; +-// Experimental auto vectorization +-def FeatureAutoVec +- : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", +- "Experimental auto vectorization">; + + // Floating point approximation operation + def FeatureFrecipe +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +index d47dded9ea6e..7961bb141e64 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +@@ -26,8 +26,6 @@ TypeSize LoongArchTTIImpl::getRegisterBitWidth( + case TargetTransformInfo::RGK_Scalar: + return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); + case TargetTransformInfo::RGK_FixedWidthVector: +- if (!ST->hasExpAutoVec()) +- return DefSize; + if (ST->hasExtLASX()) + return TypeSize::getFixed(256); + if (ST->hasExtLSX()) +diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +index a8ac2411dd82..6ab300859f9d 100644 +--- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll ++++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +@@ -1,5 +1,5 @@ + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +-; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s ++; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx -S | FileCheck %s + + ;; This is a collection of tests whose only purpose is to show changes in the + ;; default configuration. Please keep these tests minimal - if you're testing +-- +2.20.1 + diff --git a/0024-LoongArch-Add-some-binary-IR-instructions-testcases-.patch b/0024-LoongArch-Add-some-binary-IR-instructions-testcases-.patch new file mode 100644 index 0000000..9dd1c0a --- /dev/null +++ b/0024-LoongArch-Add-some-binary-IR-instructions-testcases-.patch @@ -0,0 +1,1522 @@ +From 14502c64687d2a8524db46d0c952a54ccea17682 Mon Sep 17 00:00:00 2001 +From: leecheechen +Date: Fri, 1 Dec 2023 13:14:11 +0800 +Subject: [PATCH 24/42] [LoongArch] Add some binary IR instructions testcases + for LASX (#74031) + +The IR instructions include: +- Binary Operations: add fadd sub fsub mul fmul udiv sdiv fdiv +- Bitwise Binary Operations: shl lshr ashr + +(cherry picked from commit dbbc7c31c8e55d72dc243b244e386a25132e7215) + +--- + .../LoongArch/lasx/ir-instruction/add.ll | 122 +++++++++ + .../LoongArch/lasx/ir-instruction/ashr.ll | 178 +++++++++++++ + .../LoongArch/lasx/ir-instruction/fadd.ll | 34 +++ + .../LoongArch/lasx/ir-instruction/fdiv.ll | 34 +++ + .../LoongArch/lasx/ir-instruction/fmul.ll | 34 +++ + .../LoongArch/lasx/ir-instruction/fsub.ll | 34 +++ + .../LoongArch/lasx/ir-instruction/lshr.ll | 178 +++++++++++++ + .../LoongArch/lasx/ir-instruction/mul.ll | 238 ++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/sdiv.ll | 134 ++++++++++ + .../LoongArch/lasx/ir-instruction/shl.ll | 178 +++++++++++++ + .../LoongArch/lasx/ir-instruction/sub.ll | 122 +++++++++ + .../LoongArch/lasx/ir-instruction/udiv.ll | 122 +++++++++ + 12 files changed, 1408 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll +new file mode 100644 +index 000000000000..8e4d0dc6f1c3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvadd.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = add <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvadd.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = add <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvadd.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = add <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = add <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v32i8_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v32i8_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = add <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v16i16_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v16i16_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = add <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = add <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v4i64_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v4i64_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvaddi.du $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = add <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll +new file mode 100644 +index 000000000000..fcbf0f1400fe +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @ashr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsra.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = ashr <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsra.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = ashr <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsra.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = ashr <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsra.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = ashr <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v32i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v32i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = ashr <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v32i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v32i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.b $xr0, $xr0, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = ashr <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v16i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = ashr <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v16i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = ashr <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v8i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = ashr <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.w $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = ashr <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v4i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = ashr <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v4i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.d $xr0, $xr0, 63 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = ashr <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll +new file mode 100644 +index 000000000000..365bb305fc5a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fadd_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fadd_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfadd.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = fadd <8 x float> %v0, %v1 ++ store <8 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fadd_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fadd_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfadd.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = fadd <4 x double> %v0, %v1 ++ store <4 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +new file mode 100644 +index 000000000000..284121a79a49 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fdiv_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfdiv.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = fdiv <8 x float> %v0, %v1 ++ store <8 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fdiv_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfdiv.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = fdiv <4 x double> %v0, %v1 ++ store <4 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll +new file mode 100644 +index 000000000000..a48dca8d2847 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fmul_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fmul_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = fmul <8 x float> %v0, %v1 ++ store <8 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fmul_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fmul_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = fmul <4 x double> %v0, %v1 ++ store <4 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll +new file mode 100644 +index 000000000000..6164aa5a55c7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fsub_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fsub_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = fsub <8 x float> %v0, %v1 ++ store <8 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fsub_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fsub_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = fsub <4 x double> %v0, %v1 ++ store <4 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll +new file mode 100644 +index 000000000000..24be69d8032a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @lshr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsrl.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = lshr <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsrl.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = lshr <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsrl.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = lshr <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsrl.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = lshr <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v32i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v32i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = lshr <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v32i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v32i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.b $xr0, $xr0, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = lshr <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v16i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = lshr <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v16i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = lshr <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v8i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = lshr <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.w $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = lshr <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v4i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = lshr <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v4i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.d $xr0, $xr0, 63 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = lshr <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll +new file mode 100644 +index 000000000000..dcb893caa255 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll +@@ -0,0 +1,238 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @mul_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmul.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = mul <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = mul <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmul.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = mul <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmul.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = mul <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_square_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = mul <32 x i8> %v0, %v0 ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v16i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = mul <16 x i16> %v0, %v0 ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v8i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = mul <8 x i32> %v0, %v0 ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v4i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = mul <4 x i64> %v0, %v0 ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v32i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v32i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.b $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = mul <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v16i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v16i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.h $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = mul <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v8i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v8i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.w $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = mul <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v4i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v4i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.d $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = mul <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v32i8_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v32i8_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.b $xr1, 17 ++; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = mul <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v16i16_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v16i16_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, 17 ++; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = mul <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v8i32_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v8i32_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.w $xr1, 17 ++; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = mul <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v4i64_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v4i64_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.d $xr1, 17 ++; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = mul <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll +new file mode 100644 +index 000000000000..e3635a5f14a2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll +@@ -0,0 +1,134 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @sdiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = sdiv <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = sdiv <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = sdiv <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = sdiv <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v32i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v32i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.b $xr1, $xr0, 7 ++; CHECK-NEXT: xvsrli.b $xr1, $xr1, 5 ++; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvsrai.b $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = sdiv <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v16i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v16i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.h $xr1, $xr0, 15 ++; CHECK-NEXT: xvsrli.h $xr1, $xr1, 13 ++; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvsrai.h $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = sdiv <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v8i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v8i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.w $xr1, $xr0, 31 ++; CHECK-NEXT: xvsrli.w $xr1, $xr1, 29 ++; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvsrai.w $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = sdiv <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v4i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v4i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.d $xr1, $xr0, 63 ++; CHECK-NEXT: xvsrli.d $xr1, $xr1, 61 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvsrai.d $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = sdiv <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll +new file mode 100644 +index 000000000000..8a02c7e3ac97 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @shl_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsll.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = shl <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsll.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = shl <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsll.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = shl <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsll.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = shl <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v32i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v32i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = shl <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v32i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v32i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.b $xr0, $xr0, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = shl <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v16i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v16i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = shl <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v16i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v16i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = shl <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v8i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v8i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = shl <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.w $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = shl <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v4i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v4i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = shl <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v4i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v4i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.d $xr0, $xr0, 63 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = shl <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll +new file mode 100644 +index 000000000000..bcfff1651477 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsub.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = sub <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsub.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = sub <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsub.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = sub <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsub.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = sub <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v32i8_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v32i8_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = sub <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v16i16_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v16i16_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = sub <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = sub <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v4i64_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v4i64_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsubi.du $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = sub <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll +new file mode 100644 +index 000000000000..e78084c7186d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @udiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = udiv <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = udiv <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = udiv <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = udiv <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v32i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v32i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.b $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = udiv <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v16i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v16i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.h $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = udiv <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v8i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v8i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.w $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = udiv <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v4i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v4i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.d $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = udiv <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0024-LoongArch-Fix-the-procossor-series-mask.patch b/0024-LoongArch-Fix-the-procossor-series-mask.patch new file mode 100644 index 0000000..a022333 --- /dev/null +++ b/0024-LoongArch-Fix-the-procossor-series-mask.patch @@ -0,0 +1,30 @@ +From 43d78731b1d4c7419d82b6fab2ae479652cd2b58 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Wed, 3 Jan 2024 13:59:12 +0800 +Subject: [PATCH 24/27] [LoongArch] Fix the procossor series mask + +Refer PRID_SERIES_MASK definition in linux kernel: +arch/loongarch/include/asm/cpu.h. + +(cherry picked from commit 7e186d366d6c7def0543acc255931f617e76dff0) +--- + llvm/lib/TargetParser/Host.cpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp +index 81309280a44b..d11dc605e188 100644 +--- a/llvm/lib/TargetParser/Host.cpp ++++ b/llvm/lib/TargetParser/Host.cpp +@@ -1462,7 +1462,8 @@ StringRef sys::getHostCPUName() { + // Use processor id to detect cpu name. + uint32_t processor_id; + __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); +- switch (processor_id & 0xff00) { ++ // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. ++ switch (processor_id & 0xf000) { + case 0xc000: // Loongson 64bit, 4-issue + return "la464"; + // TODO: Others. +-- +2.20.1 + diff --git a/0025-LoongArch-Make-sure-that-the-LoongArchISD-BSTRINS-no.patch b/0025-LoongArch-Make-sure-that-the-LoongArchISD-BSTRINS-no.patch new file mode 100644 index 0000000..9f6b6c7 --- /dev/null +++ b/0025-LoongArch-Make-sure-that-the-LoongArchISD-BSTRINS-no.patch @@ -0,0 +1,59 @@ +From 758014af0c9b2a54a57a8dc9dfa206be6be4161e Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Mon, 11 Mar 2024 08:59:17 +0800 +Subject: [PATCH 25/27] [LoongArch] Make sure that the LoongArchISD::BSTRINS + node uses the correct `MSB` value (#84454) + +The `MSB` must not be greater than `GRLen`. Without this patch, newly +added test cases will crash with LoongArch32, resulting in a 'cannot +select' error. + +(cherry picked from commit edd4c6c6dca4c556de22b2ab73d5bfc02d28e59b) +(cherry picked from commit d77c5c3830d925b3795e2f1535a6568399fe6626) +--- + llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 +++- + llvm/test/CodeGen/LoongArch/bstrins_w.ll | 13 +++++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index ed106cb766bc..5affaf37ad5a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -2310,7 +2310,9 @@ Retry: + return DAG.getNode( + LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), +- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), ++ DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) ++ : (MaskIdx0 + MaskLen0 - 1), ++ DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + +diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll +index dfbe000841cd..e008caacad2a 100644 +--- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll ++++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll +@@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind { + ret i32 %or + } + ++;; The high bits of `const` are zero. ++define i32 @pat5_high_zeros(i32 %a) nounwind { ++; CHECK-LABEL: pat5_high_zeros: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $a1, 1 ++; CHECK-NEXT: ori $a1, $a1, 564 ++; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16 ++; CHECK-NEXT: ret ++ %and = and i32 %a, 65535 ; 0x0000ffff ++ %or = or i32 %and, 305397760 ; 0x12340000 ++ ret i32 %or ++} ++ + ;; Pattern 6: a = b | ((c & mask) << shamt) + ;; In this testcase b is 0x10000002, but in fact we do not require b being a + ;; constant. As long as all positions in b to be overwritten by the incoming +-- +2.20.1 + diff --git a/0025-LoongArch-Override-TargetLowering-isShuffleMaskLegal.patch b/0025-LoongArch-Override-TargetLowering-isShuffleMaskLegal.patch new file mode 100644 index 0000000..b72d821 --- /dev/null +++ b/0025-LoongArch-Override-TargetLowering-isShuffleMaskLegal.patch @@ -0,0 +1,115 @@ +From b18f81359a267958fef5c9e6f203fca6e891cce6 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Sat, 2 Dec 2023 14:25:17 +0800 +Subject: [PATCH 25/42] [LoongArch] Override TargetLowering::isShuffleMaskLegal + + By default, `isShuffleMaskLegal` always returns true, which can result + in the expansion of `BUILD_VECTOR` into a `VECTOR_SHUFFLE` node in + certain situations. Subsequently, the `VECTOR_SHUFFLE` node is expanded + again into a `BUILD_VECTOR`, leading to an infinite loop. + To address this, we always return false, allowing the expansion of + `BUILD_VECTOR` through the stack. + +(cherry picked from commit 66a3e4fafb6eae19764f8a192ca3a116c0554211) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 10 +++++++++ + .../Target/LoongArch/LoongArchISelLowering.h | 5 +++++ + .../CodeGen/LoongArch/lsx/build-vector.ll | 22 +++++++++++++++++++ + 3 files changed, 37 insertions(+) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 1b60bfc3bddb..e45f21265d7b 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -239,6 +239,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { ++ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, + Legal); +@@ -268,6 +269,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { ++ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, + Legal); +@@ -370,10 +372,18 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); ++ case ISD::VECTOR_SHUFFLE: ++ return lowerVECTOR_SHUFFLE(Op, DAG); + } + return SDValue(); + } + ++SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, ++ SelectionDAG &DAG) const { ++ // TODO: custom shuffle. ++ return SDValue(); ++} ++ + static bool isConstantOrUndef(const SDValue Op) { + if (Op->isUndef()) + return true; +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 111376306374..2c35f9e5d378 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -230,6 +230,10 @@ public: + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + unsigned *Fast = nullptr) const override; + ++ bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { ++ return false; ++ } ++ + private: + /// Target-specific function used to lower LoongArch calling conventions. + typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, +@@ -277,6 +281,7 @@ private: + SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; +diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +index 3a74db5e1acb..ed1f610a5fa6 100644 +--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll ++++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +@@ -374,3 +374,25 @@ entry: + store <2 x double> %ins1, ptr %dst + ret void + } ++ ++;; BUILD_VECTOR through stack. ++;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop. ++define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 ++; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 ++; CHECK-NEXT: st.d $a0, $sp, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %e = extractelement <4 x i32> %v, i32 1 ++ %z = zext i32 %e to i64 ++ %r = insertelement <2 x i64> undef, i64 %z, i32 0 ++ store <2 x i64> %r, ptr %dst ++ ret void ++} +-- +2.20.1 + diff --git a/0026-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch b/0026-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch new file mode 100644 index 0000000..1fb665e --- /dev/null +++ b/0026-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch @@ -0,0 +1,108 @@ +From 150f40b2bd7023643e14d734bd4f554a8107bdbb Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 5 Mar 2024 19:44:28 +0800 +Subject: [PATCH 26/27] [Clang][LoongArch] Precommit test for fix wrong return + value type of __iocsrrd_h. NFC + +(cherry picked from commit aeda1a6e800e0dd6c91c0332b4db95094ad5b301) +(cherry picked from commit a9ba36c7e7d7fa076f201843e3b826b6c6d7f5ef) +--- + clang/test/CodeGen/LoongArch/intrinsic-la32.c | 29 ++++++++++++++----- + clang/test/CodeGen/LoongArch/intrinsic-la64.c | 21 ++++++++++++-- + 2 files changed, 40 insertions(+), 10 deletions(-) + +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +index 93d54f511a9c..6a8d99880be3 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +@@ -169,8 +169,8 @@ unsigned int cpucfg(unsigned int a) { + + // LA32-LABEL: @rdtime( + // LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 +-// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 ++// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] ++// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] + // LA32-NEXT: ret void + // + void rdtime() { +@@ -201,13 +201,28 @@ void loongarch_movgr2fcsr(int a) { + __builtin_loongarch_movgr2fcsr(1, a); + } + +-// CHECK-LABEL: @cacop_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) +-// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) +-// CHECK-NEXT: ret void ++// LA32-LABEL: @cacop_w( ++// LA32-NEXT: entry: ++// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) ++// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) ++// LA32-NEXT: ret void + // + void cacop_w(unsigned long int a) { + __cacop_w(1, a, 1024); + __builtin_loongarch_cacop_w(1, a, 1024); + } ++ ++// LA32-LABEL: @iocsrrd_h_result( ++// LA32-NEXT: entry: ++// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) ++// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) ++// LA32-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 ++// LA32-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] ++// LA32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 ++// LA32-NEXT: ret i16 [[CONV4]] ++// ++unsigned short iocsrrd_h_result(unsigned int a) { ++ unsigned short b = __iocsrrd_h(a); ++ unsigned short c = __builtin_loongarch_iocsrrd_h(a); ++ return b+c; ++} +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +index a740882eef54..48b6a7a3d227 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +@@ -387,7 +387,7 @@ unsigned int cpucfg(unsigned int a) { + + // CHECK-LABEL: @rdtime_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 ++// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] + // CHECK-NEXT: ret void + // + void rdtime_d() { +@@ -396,8 +396,8 @@ void rdtime_d() { + + // CHECK-LABEL: @rdtime( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 +-// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !4 ++// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META4:![0-9]+]] + // CHECK-NEXT: ret void + // + void rdtime() { +@@ -427,3 +427,18 @@ void loongarch_movgr2fcsr(int a) { + __movgr2fcsr(1, a); + __builtin_loongarch_movgr2fcsr(1, a); + } ++ ++// CHECK-LABEL: @iocsrrd_h_result( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) ++// CHECK-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 ++// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] ++// CHECK-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 ++// CHECK-NEXT: ret i16 [[CONV4]] ++// ++unsigned short iocsrrd_h_result(unsigned int a) { ++ unsigned short b = __iocsrrd_h(a); ++ unsigned short c = __builtin_loongarch_iocsrrd_h(a); ++ return b+c; ++} +-- +2.20.1 + diff --git a/0026-Reland-LoongArch-Support-CTLZ-with-lsx-lasx.patch b/0026-Reland-LoongArch-Support-CTLZ-with-lsx-lasx.patch new file mode 100644 index 0000000..ff7b54f --- /dev/null +++ b/0026-Reland-LoongArch-Support-CTLZ-with-lsx-lasx.patch @@ -0,0 +1,389 @@ +From 4bbe405d5a8a789f23a4b430fe619c1cdfa3d631 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Sat, 2 Dec 2023 16:24:33 +0800 +Subject: [PATCH 26/42] Reland "[LoongArch] Support CTLZ with lsx/lasx" + +This patch simultaneously adds tests for `CTPOP`. + +This relands 07cec73dcd095035257eec1f213d273b10988130 with fix tests. + +(cherry picked from commit a60a5421b60be1bce0272385fa16846ada5eed5e) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 13 +- + .../LoongArch/LoongArchLASXInstrInfo.td | 11 +- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 11 +- + .../test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll | 115 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll | 115 ++++++++++++++++++ + 5 files changed, 255 insertions(+), 10 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index e45f21265d7b..358263b1a258 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -247,7 +247,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + VT, Legal); + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); +- setOperationAction(ISD::CTPOP, VT, Legal); ++ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); +@@ -277,7 +277,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + VT, Legal); + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); +- setOperationAction(ISD::CTPOP, VT, Legal); ++ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); +@@ -2800,6 +2800,15 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lasx_xvsrai_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vclz_b: ++ case Intrinsic::loongarch_lsx_vclz_h: ++ case Intrinsic::loongarch_lsx_vclz_w: ++ case Intrinsic::loongarch_lsx_vclz_d: ++ case Intrinsic::loongarch_lasx_xvclz_b: ++ case Intrinsic::loongarch_lasx_xvclz_h: ++ case Intrinsic::loongarch_lasx_xvclz_w: ++ case Intrinsic::loongarch_lasx_xvclz_d: ++ return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); + case Intrinsic::loongarch_lsx_vpcnt_b: + case Intrinsic::loongarch_lsx_vpcnt_h: + case Intrinsic::loongarch_lsx_vpcnt_w: +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index a5652472481a..960ac627578c 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1273,6 +1273,9 @@ defm : PatXrXr; + defm : PatShiftXrXr; + defm : PatShiftXrUimm; + ++// XVCLZ_{B/H/W/D} ++defm : PatXr; ++ + // XVPCNT_{B/H/W/D} + defm : PatXr; + +@@ -1590,26 +1593,26 @@ foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", + // (LAInst vty:$xj)>; + foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", + "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", +- "XVCLO_B", "XVCLZ_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", ++ "XVCLO_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", + "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", + "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in + def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", +- "XVCLO_H", "XVCLZ_H", "XVFCVTL_S_H", "XVFCVTH_S_H", ++ "XVCLO_H", "XVFCVTL_S_H", "XVFCVTH_S_H", + "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", + "VEXT2XV_DU_HU", "XVREPLVE0_H"] in + def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", +- "XVCLO_W", "XVCLZ_W", "XVFFINT_S_W", "XVFFINT_S_WU", ++ "XVCLO_W", "XVFFINT_S_W", "XVFFINT_S_WU", + "XVFFINTL_D_W", "XVFFINTH_D_W", + "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in + def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", + "XVEXTL_Q_D", "XVEXTL_QU_DU", +- "XVCLO_D", "XVCLZ_D", "XVFFINT_D_L", "XVFFINT_D_LU", ++ "XVCLO_D", "XVFFINT_D_L", "XVFFINT_D_LU", + "XVREPLVE0_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 5645ce51194a..3480ade9eebf 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1350,6 +1350,9 @@ defm : PatVrVr; + defm : PatShiftVrVr; + defm : PatShiftVrUimm; + ++// VCLZ_{B/H/W/D} ++defm : PatVr; ++ + // VPCNT_{B/H/W/D} + defm : PatVr; + +@@ -1674,21 +1677,21 @@ foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", + // (LAInst vty:$vj)>; + foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", + "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", +- "VCLO_B", "VCLZ_B"] in ++ "VCLO_B"] in + def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", +- "VCLO_H", "VCLZ_H", "VFCVTL_S_H", "VFCVTH_S_H"] in ++ "VCLO_H", "VFCVTL_S_H", "VFCVTH_S_H"] in + def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", +- "VCLO_W", "VCLZ_W", "VFFINT_S_W", "VFFINT_S_WU", ++ "VCLO_W", "VFFINT_S_W", "VFFINT_S_WU", + "VFFINTL_D_W", "VFFINTH_D_W"] in + def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", + "VEXTL_Q_D", "VEXTL_QU_DU", +- "VCLO_D", "VCLZ_D", "VFFINT_D_L", "VFFINT_D_LU"] in ++ "VCLO_D", "VFFINT_D_L", "VFFINT_D_LU"] in + def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll +new file mode 100644 +index 000000000000..7786e399c95f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll +@@ -0,0 +1,115 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @ctpop_v32i8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvpcnt.b $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <32 x i8>, ptr %src ++ %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %v) ++ store <32 x i8> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v16i16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v16i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvpcnt.h $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <16 x i16>, ptr %src ++ %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %v) ++ store <16 x i16> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v8i32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvpcnt.w $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <8 x i32>, ptr %src ++ %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %v) ++ store <8 x i32> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v4i64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvpcnt.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <4 x i64>, ptr %src ++ %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %v) ++ store <4 x i64> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v32i8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvclz.b $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <32 x i8>, ptr %src ++ %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %v, i1 false) ++ store <32 x i8> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v16i16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v16i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvclz.h $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <16 x i16>, ptr %src ++ %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %v, i1 false) ++ store <16 x i16> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v8i32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvclz.w $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <8 x i32>, ptr %src ++ %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %v, i1 false) ++ store <8 x i32> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvclz.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <4 x i64>, ptr %src ++ %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %v, i1 false) ++ store <4 x i64> %res, ptr %dst ++ ret void ++} ++ ++declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) ++declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) ++declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) ++declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) ++declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) ++declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) ++declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) ++declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll +new file mode 100644 +index 000000000000..5df553fba7ef +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll +@@ -0,0 +1,115 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @ctpop_v16i8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpcnt.b $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <16 x i8>, ptr %src ++ %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %v) ++ store <16 x i8> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v8i16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v8i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpcnt.h $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <8 x i16>, ptr %src ++ %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %v) ++ store <8 x i16> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v4i32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpcnt.w $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <4 x i32>, ptr %src ++ %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %v) ++ store <4 x i32> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v2i64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpcnt.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <2 x i64>, ptr %src ++ %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %v) ++ store <2 x i64> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v16i8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vclz.b $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <16 x i8>, ptr %src ++ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %v, i1 false) ++ store <16 x i8> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v8i16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v8i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vclz.h $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <8 x i16>, ptr %src ++ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %v, i1 false) ++ store <8 x i16> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v4i32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vclz.w $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <4 x i32>, ptr %src ++ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %v, i1 false) ++ store <4 x i32> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vclz.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <2 x i64>, ptr %src ++ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %v, i1 false) ++ store <2 x i64> %res, ptr %dst ++ ret void ++} ++ ++declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) ++declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) ++declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) ++declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) ++declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) ++declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) ++declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) ++declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) +-- +2.20.1 + diff --git a/0027-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch b/0027-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch new file mode 100644 index 0000000..c1a4c28 --- /dev/null +++ b/0027-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch @@ -0,0 +1,72 @@ +From a18df7b21762bef413dfa2c9a2711860fc9678b3 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 6 Mar 2024 10:03:28 +0800 +Subject: [PATCH 27/27] [Clang][LoongArch] Fix wrong return value type of + __iocsrrd_h (#84100) + +relate: +https: //gcc.gnu.org/pipermail/gcc-patches/2024-February/645016.html +(cherry picked from commit 2f479b811274fede36535e34ecb545ac22e399c3) +(cherry picked from commit 9b9aee16d4dcf1b4af49988ebd7918fa4ce77e44) +--- + clang/lib/Headers/larchintrin.h | 2 +- + clang/test/CodeGen/LoongArch/intrinsic-la32.c | 8 ++++---- + clang/test/CodeGen/LoongArch/intrinsic-la64.c | 8 ++++---- + 3 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h +index c5c533ee0b8c..24dd29ce91ff 100644 +--- a/clang/lib/Headers/larchintrin.h ++++ b/clang/lib/Headers/larchintrin.h +@@ -156,7 +156,7 @@ extern __inline unsigned char + return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); + } + +-extern __inline unsigned char ++extern __inline unsigned short + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_h(unsigned int _1) { + return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +index 6a8d99880be3..eb3f8cbe7ac4 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +@@ -215,11 +215,11 @@ void cacop_w(unsigned long int a) { + // LA32-LABEL: @iocsrrd_h_result( + // LA32-NEXT: entry: + // LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) ++// LA32-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 + // LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +-// LA32-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 +-// LA32-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] +-// LA32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 +-// LA32-NEXT: ret i16 [[CONV4]] ++// LA32-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ++// LA32-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] ++// LA32-NEXT: ret i16 [[CONV3]] + // + unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +index 48b6a7a3d227..50ec358f546e 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +@@ -431,11 +431,11 @@ void loongarch_movgr2fcsr(int a) { + // CHECK-LABEL: @iocsrrd_h_result( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) ++// CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 + // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +-// CHECK-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 +-// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] +-// CHECK-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 +-// CHECK-NEXT: ret i16 [[CONV4]] ++// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ++// CHECK-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] ++// CHECK-NEXT: ret i16 [[CONV3]] + // + unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); +-- +2.20.1 + diff --git a/0027-LoongArch-Support-MULHS-MULHU-with-lsx-lasx.patch b/0027-LoongArch-Support-MULHS-MULHU-with-lsx-lasx.patch new file mode 100644 index 0000000..8e3e4ca --- /dev/null +++ b/0027-LoongArch-Support-MULHS-MULHU-with-lsx-lasx.patch @@ -0,0 +1,408 @@ +From 397fac3cd60fbbe17e8e99bfa79c6358b7d53df0 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Mon, 4 Dec 2023 10:44:39 +0800 +Subject: [PATCH 27/42] [LoongArch] Support MULHS/MULHU with lsx/lasx + +Mark MULHS/MULHU nodes as legal and adds the necessary patterns. + +(cherry picked from commit e9cd197d15300f186a5a32092103add65fbd3f50) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 2 + + .../LoongArch/LoongArchLASXInstrInfo.td | 4 + + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 + + llvm/test/CodeGen/LoongArch/lasx/mulh.ll | 162 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/mulh.ll | 162 ++++++++++++++++++ + 5 files changed, 334 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/mulh.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/mulh.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 358263b1a258..3d8d6898a4d5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -248,6 +248,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); ++ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); + } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); +@@ -278,6 +279,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); ++ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); + } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 960ac627578c..240f28b0dc5a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1217,6 +1217,10 @@ defm : PatXrUimm5; + // XVMUL_{B/H/W/D} + defm : PatXrXr; + ++// XVMUH_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++ + // XVMADD_{B/H/W/D} + defm : PatXrXrXr; + // XVMSUB_{B/H/W/D} +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 3480ade9eebf..fb4726c530b5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1294,6 +1294,10 @@ defm : PatVrUimm5; + // VMUL_{B/H/W/D} + defm : PatVrVr; + ++// VMUH_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++ + // VMADD_{B/H/W/D} + defm : PatVrVrVr; + // VMSUB_{B/H/W/D} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll +new file mode 100644 +index 000000000000..aac711a4a371 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll +@@ -0,0 +1,162 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v0s = sext <32 x i8> %v0 to <32 x i16> ++ %v1s = sext <32 x i8> %v1 to <32 x i16> ++ %m = mul <32 x i16> %v0s, %v1s ++ %s = ashr <32 x i16> %m, ++ %v2 = trunc <32 x i16> %s to <32 x i8> ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v0z = zext <32 x i8> %v0 to <32 x i16> ++ %v1z = zext <32 x i8> %v1 to <32 x i16> ++ %m = mul <32 x i16> %v0z, %v1z ++ %s = lshr <32 x i16> %m, ++ %v2 = trunc <32 x i16> %s to <32 x i8> ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v0s = sext <16 x i16> %v0 to <16 x i32> ++ %v1s = sext <16 x i16> %v1 to <16 x i32> ++ %m = mul <16 x i32> %v0s, %v1s ++ %s = ashr <16 x i32> %m, ++ %v2 = trunc <16 x i32> %s to <16 x i16> ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v0z = zext <16 x i16> %v0 to <16 x i32> ++ %v1z = zext <16 x i16> %v1 to <16 x i32> ++ %m = mul <16 x i32> %v0z, %v1z ++ %s = lshr <16 x i32> %m, ++ %v2 = trunc <16 x i32> %s to <16 x i16> ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v0s = sext <8 x i32> %v0 to <8 x i64> ++ %v1s = sext <8 x i32> %v1 to <8 x i64> ++ %m = mul <8 x i64> %v0s, %v1s ++ %s = ashr <8 x i64> %m, ++ %v2 = trunc <8 x i64> %s to <8 x i32> ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v0z = zext <8 x i32> %v0 to <8 x i64> ++ %v1z = zext <8 x i32> %v1 to <8 x i64> ++ %m = mul <8 x i64> %v0z, %v1z ++ %s = lshr <8 x i64> %m, ++ %v2 = trunc <8 x i64> %s to <8 x i32> ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v0s = sext <4 x i64> %v0 to <4 x i128> ++ %v1s = sext <4 x i64> %v1 to <4 x i128> ++ %m = mul <4 x i128> %v0s, %v1s ++ %s = ashr <4 x i128> %m, ++ %v2 = trunc <4 x i128> %s to <4 x i64> ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v0z = zext <4 x i64> %v0 to <4 x i128> ++ %v1z = zext <4 x i64> %v1 to <4 x i128> ++ %m = mul <4 x i128> %v0z, %v1z ++ %s = lshr <4 x i128> %m, ++ %v2 = trunc <4 x i128> %s to <4 x i64> ++ store <4 x i64> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll +new file mode 100644 +index 000000000000..e1388f00e355 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll +@@ -0,0 +1,162 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v0s = sext <16 x i8> %v0 to <16 x i16> ++ %v1s = sext <16 x i8> %v1 to <16 x i16> ++ %m = mul <16 x i16> %v0s, %v1s ++ %s = ashr <16 x i16> %m, ++ %v2 = trunc <16 x i16> %s to <16 x i8> ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v0z = zext <16 x i8> %v0 to <16 x i16> ++ %v1z = zext <16 x i8> %v1 to <16 x i16> ++ %m = mul <16 x i16> %v0z, %v1z ++ %s = lshr <16 x i16> %m, ++ %v2 = trunc <16 x i16> %s to <16 x i8> ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v0s = sext <8 x i16> %v0 to <8 x i32> ++ %v1s = sext <8 x i16> %v1 to <8 x i32> ++ %m = mul <8 x i32> %v0s, %v1s ++ %s = ashr <8 x i32> %m, ++ %v2 = trunc <8 x i32> %s to <8 x i16> ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v0z = zext <8 x i16> %v0 to <8 x i32> ++ %v1z = zext <8 x i16> %v1 to <8 x i32> ++ %m = mul <8 x i32> %v0z, %v1z ++ %s = lshr <8 x i32> %m, ++ %v2 = trunc <8 x i32> %s to <8 x i16> ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v0s = sext <4 x i32> %v0 to <4 x i64> ++ %v1s = sext <4 x i32> %v1 to <4 x i64> ++ %m = mul <4 x i64> %v0s, %v1s ++ %s = ashr <4 x i64> %m, ++ %v2 = trunc <4 x i64> %s to <4 x i32> ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v0z = zext <4 x i32> %v0 to <4 x i64> ++ %v1z = zext <4 x i32> %v1 to <4 x i64> ++ %m = mul <4 x i64> %v0z, %v1z ++ %s = lshr <4 x i64> %m, ++ %v2 = trunc <4 x i64> %s to <4 x i32> ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v0s = sext <2 x i64> %v0 to <2 x i128> ++ %v1s = sext <2 x i64> %v1 to <2 x i128> ++ %m = mul <2 x i128> %v0s, %v1s ++ %s = ashr <2 x i128> %m, ++ %v2 = trunc <2 x i128> %s to <2 x i64> ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v0z = zext <2 x i64> %v0 to <2 x i128> ++ %v1z = zext <2 x i64> %v1 to <2 x i128> ++ %m = mul <2 x i128> %v0z, %v1z ++ %s = lshr <2 x i128> %m, ++ %v2 = trunc <2 x i128> %s to <2 x i64> ++ store <2 x i64> %v2, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0028-LoongArch-Make-ISD-VSELECT-a-legal-operation-with-ls.patch b/0028-LoongArch-Make-ISD-VSELECT-a-legal-operation-with-ls.patch new file mode 100644 index 0000000..a8510a8 --- /dev/null +++ b/0028-LoongArch-Make-ISD-VSELECT-a-legal-operation-with-ls.patch @@ -0,0 +1,273 @@ +From 363970a20c51d39e1406fa7f516d95142024779d Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 6 Dec 2023 16:43:38 +0800 +Subject: [PATCH 28/42] [LoongArch] Make ISD::VSELECT a legal operation with + lsx/lasx + +(cherry picked from commit de21308f78f3b0f0910638dbdac90967150d19f0) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 5 ++ + .../LoongArch/LoongArchLASXInstrInfo.td | 8 ++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 ++ + llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 86 +++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/vselect.ll | 86 +++++++++++++++++++ + 5 files changed, 193 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/vselect.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/vselect.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 3d8d6898a4d5..229251987ae4 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -237,6 +237,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); ++ ++ setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); +@@ -268,6 +270,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); ++ ++ setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); +@@ -305,6 +309,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setStackPointerRegisterToSaveRestore(LoongArch::R3); + + setBooleanContents(ZeroOrOneBooleanContent); ++ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 240f28b0dc5a..0bd8db1bfdf0 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1480,6 +1480,14 @@ def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), + def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), + (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; + ++// vselect ++def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, ++ (v32i8 (SplatPat_uimm8 uimm8:$imm)))), ++ (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; ++foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in ++ def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), ++ (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index fb4726c530b5..5800ff6f6266 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1564,6 +1564,14 @@ def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), + def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), + (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; + ++// vselect ++def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, ++ (v16i8 (SplatPat_uimm8 uimm8:$imm)))), ++ (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; ++foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in ++ def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), ++ (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +new file mode 100644 +index 000000000000..24f4bcf752d3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +@@ -0,0 +1,86 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: select_v32i8_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, -256 ++; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> ++ store <32 x i8> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvrepli.h $xr2, -256 ++; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> %v1 ++ store <32 x i8> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v16i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: lu12i.w $a1, -16 ++; CHECK-NEXT: lu32i.d $a1, 0 ++; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 ++; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %sel = select <16 x i1> , <16 x i16> %v0, <16 x i16> %v1 ++ store <16 x i16> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: ori $a1, $zero, 0 ++; CHECK-NEXT: lu32i.d $a1, -1 ++; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1 ++; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %sel = select <8 x i1> , <8 x i32> %v0, <8 x i32> %v1 ++ store <8 x i32> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) ++; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) ++; CHECK-NEXT: xvld $xr0, $a3, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvld $xr2, $a2, 0 ++; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %sel = select <4 x i1> , <4 x i64> %v0, <4 x i64> %v1 ++ store <4 x i64> %sel, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +new file mode 100644 +index 000000000000..00e3d9313f13 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +@@ -0,0 +1,86 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: select_v16i8_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, -256 ++; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> ++ store <16 x i8> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vrepli.h $vr2, -256 ++; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> %v1 ++ store <16 x i8> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v8i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: lu12i.w $a1, -16 ++; CHECK-NEXT: lu32i.d $a1, 0 ++; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 ++; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %sel = select <8 x i1> , <8 x i16> %v0, <8 x i16> %v1 ++ store <8 x i16> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: ori $a1, $zero, 0 ++; CHECK-NEXT: lu32i.d $a1, -1 ++; CHECK-NEXT: vreplgr2vr.d $vr2, $a1 ++; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %sel = select <4 x i1> , <4 x i32> %v0, <4 x i32> %v1 ++ store <4 x i32> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) ++; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) ++; CHECK-NEXT: vld $vr0, $a3, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vld $vr2, $a2, 0 ++; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %sel = select <2 x i1> , <2 x i64> %v0, <2 x i64> %v1 ++ store <2 x i64> %sel, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0029-LoongArch-Add-codegen-support-for-icmp-fcmp-with-lsx.patch b/0029-LoongArch-Add-codegen-support-for-icmp-fcmp-with-lsx.patch new file mode 100644 index 0000000..c63b641 --- /dev/null +++ b/0029-LoongArch-Add-codegen-support-for-icmp-fcmp-with-lsx.patch @@ -0,0 +1,3596 @@ +From f82f71d8a0e678db830c4f00f4cb27e824d2a536 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Thu, 7 Dec 2023 20:11:43 +0800 +Subject: [PATCH 29/42] [LoongArch] Add codegen support for icmp/fcmp with + lsx/lasx fetaures (#74700) + +Mark ISD::SETCC node as legal, and add handling for the vector types +condition codes. + +(cherry picked from commit 9ff7d0ebeb54347f9006405a6d08ed2b713bc411) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 14 + + .../LoongArch/LoongArchLASXInstrInfo.td | 95 ++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 95 ++ + .../LoongArch/lasx/ir-instruction/fcmp.ll | 692 +++++++++++++ + .../LoongArch/lasx/ir-instruction/icmp.ll | 939 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/fcmp.ll | 692 +++++++++++++ + .../LoongArch/lsx/ir-instruction/icmp.ll | 939 ++++++++++++++++++ + 7 files changed, 3466 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 229251987ae4..3d5ae6d3deda 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -238,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + ++ setOperationAction(ISD::SETCC, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { +@@ -251,11 +252,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); ++ setCondCodeAction( ++ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, ++ Expand); + } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); ++ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ++ ISD::SETUGE, ISD::SETUGT}, ++ VT, Expand); + } + } + +@@ -271,6 +278,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + ++ setOperationAction(ISD::SETCC, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { +@@ -284,11 +292,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); ++ setCondCodeAction( ++ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, ++ Expand); + } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); ++ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ++ ISD::SETUGE, ISD::SETUGT}, ++ VT, Expand); + } + } + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 0bd8db1bfdf0..a9bf65c6840d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1184,6 +1184,65 @@ multiclass PatShiftXrUimm { + (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; + } + ++multiclass PatCCXrSimm5 { ++ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), ++ (v32i8 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), ++ (v16i16 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), ++ (v8i32 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), ++ (v4i64 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; ++} ++ ++multiclass PatCCXrUimm5 { ++ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), ++ (v32i8 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), ++ (v16i16 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), ++ (v8i32 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), ++ (v4i64 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; ++} ++ ++multiclass PatCCXrXr { ++ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), ++ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), ++ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), ++ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatCCXrXrU { ++ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), ++ (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), ++ (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), ++ (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), ++ (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatCCXrXrF { ++ def : Pat<(v8i32 (setcc (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), CC)), ++ (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v4i64 (setcc (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), CC)), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ + let Predicates = [HasExtLASX] in { + + // XVADD_{B/H/W/D} +@@ -1389,6 +1448,42 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + ++// XVSEQ[I]_{B/H/W/D} ++defm : PatCCXrSimm5; ++defm : PatCCXrXr; ++ ++// XVSLE[I]_{B/H/W/D}[U] ++defm : PatCCXrSimm5; ++defm : PatCCXrUimm5; ++defm : PatCCXrXr; ++defm : PatCCXrXrU; ++ ++// XVSLT[I]_{B/H/W/D}[U] ++defm : PatCCXrSimm5; ++defm : PatCCXrUimm5; ++defm : PatCCXrXr; ++defm : PatCCXrXrU; ++ ++// XVFCMP.cond.{S/D} ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ + // PseudoXVINSGR2VR_{B/H} + def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), + (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 5800ff6f6266..ff21c6681271 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1261,6 +1261,65 @@ multiclass PatShiftVrUimm { + (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; + } + ++multiclass PatCCVrSimm5 { ++ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), ++ (v16i8 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), ++ (v8i16 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), ++ (v4i32 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), ++ (v2i64 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; ++} ++ ++multiclass PatCCVrUimm5 { ++ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), ++ (v16i8 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), ++ (v8i16 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), ++ (v4i32 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), ++ (v2i64 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; ++} ++ ++multiclass PatCCVrVr { ++ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), ++ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), ++ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), ++ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatCCVrVrU { ++ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), ++ (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), ++ (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), ++ (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), ++ (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatCCVrVrF { ++ def : Pat<(v4i32 (setcc (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), CC)), ++ (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v2i64 (setcc (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), CC)), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ + let Predicates = [HasExtLSX] in { + + // VADD_{B/H/W/D} +@@ -1466,6 +1525,42 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + ++// VSEQ[I]_{B/H/W/D} ++defm : PatCCVrSimm5; ++defm : PatCCVrVr; ++ ++// VSLE[I]_{B/H/W/D}[U] ++defm : PatCCVrSimm5; ++defm : PatCCVrUimm5; ++defm : PatCCVrVr; ++defm : PatCCVrVrU; ++ ++// VSLT[I]_{B/H/W/D}[U] ++defm : PatCCVrSimm5; ++defm : PatCCVrUimm5; ++defm : PatCCVrVr; ++defm : PatCCVrVrU; ++ ++// VFCMP.cond.{S/D} ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ + // VINSGR2VR_{B/H/W/D} + def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), + (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll +new file mode 100644 +index 000000000000..ef67dbc100c0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll +@@ -0,0 +1,692 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++;; TREU ++define void @v8f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_true: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvrepli.b $xr0, -1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp true <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++;; FALSE ++define void @v4f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_false: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvrepli.b $xr0, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp false <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOEQ ++define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_oeq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp oeq <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_oeq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp oeq <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUEQ ++define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ueq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ueq <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ueq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ueq <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETEQ ++define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast oeq <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast ueq <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOLE ++define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ole: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ole <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ole: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ole <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULE ++define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ule <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ule <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLE ++define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_le: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast ole <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_le: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast ule <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOLT ++define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_olt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp olt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_olt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp olt <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULT ++define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ult <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ult <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLT ++define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_lt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast olt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_lt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast ult <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETONE ++define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_one: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp one <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_one: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp one <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUNE ++define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_une: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp une <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_une: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp une <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETNE ++define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast one <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast une <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETO ++define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ord: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ord <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ord: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ord <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUO ++define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_uno: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp uno <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_uno: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp uno <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETOGT ++define void @v8f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ogt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ogt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ogt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ogt <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGT ++define void @v8f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ugt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ugt <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGT ++define void @v8f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_gt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast ogt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_gt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast ugt <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETOGE ++define void @v8f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_oge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp oge <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_oge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp oge <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGE ++define void @v8f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp uge <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp uge <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGE ++define void @v8f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast oge <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast uge <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll +new file mode 100644 +index 000000000000..6693fe0f6ec7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll +@@ -0,0 +1,939 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++;; SETEQ ++define void @v32i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvseqi.b $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp eq <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp eq <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvseqi.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp eq <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp eq <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvseqi.w $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp eq <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp eq <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvseqi.d $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp eq <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp eq <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLE ++define void @v32i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.b $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp sle <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp sle <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp sle <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp sle <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.w $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp sle <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp sle <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.d $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp sle <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp sle <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULE ++define void @v32i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.bu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp ule <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp ule <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.hu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp ule <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp ule <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.wu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp ule <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp ule <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.du $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp ule <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp ule <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLT ++define void @v32i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.b $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp slt <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp slt <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp slt <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp slt <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.w $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp slt <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp slt <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.d $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp slt <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp slt <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULT ++define void @v32i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.bu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp ult <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp ult <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.hu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp ult <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp ult <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.wu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp ult <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp ult <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.du $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp ult <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp ult <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETNE ++define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp ne <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvrepli.b $xr1, -1 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp ne <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvrepli.b $xr1, -1 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp ne <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvrepli.b $xr1, -1 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp ne <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGE ++define void @v32i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp sge <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp sge <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp sge <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp sge <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGE ++define void @v32i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp uge <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp uge <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp uge <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp uge <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGT ++define void @v32i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp sgt <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp sgt <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp sgt <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp sgt <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGT ++define void @v32i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp ugt <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp ugt <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp ugt <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp ugt <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll +new file mode 100644 +index 000000000000..53fbf0b2f86f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll +@@ -0,0 +1,692 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++;; TREU ++define void @v4f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_true: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vrepli.b $vr0, -1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp true <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++;; FALSE ++define void @v2f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_false: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vrepli.b $vr0, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp false <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOEQ ++define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_oeq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp oeq <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_oeq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp oeq <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUEQ ++define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ueq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ueq <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ueq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ueq <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETEQ ++define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast oeq <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast ueq <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOLE ++define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ole: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ole <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ole: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ole <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULE ++define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ule <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ule <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLE ++define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_le: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast ole <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_le: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast ule <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOLT ++define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_olt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp olt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_olt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp olt <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULT ++define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ult <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ult <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLT ++define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_lt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast olt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_lt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast ult <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETONE ++define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_one: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp one <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_one: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp one <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUNE ++define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_une: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cune.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp une <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_une: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cune.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp une <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETNE ++define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast one <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast une <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETO ++define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ord: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cor.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ord <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ord: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cor.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ord <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUO ++define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_uno: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cun.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp uno <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_uno: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cun.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp uno <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETOGT ++define void @v4f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ogt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ogt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ogt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ogt <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGT ++define void @v4f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ugt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ugt <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGT ++define void @v4f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_gt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast ogt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_gt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast ugt <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETOGE ++define void @v4f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_oge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp oge <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_oge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp oge <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGE ++define void @v4f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp uge <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp uge <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGE ++define void @v4f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast oge <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast uge <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll +new file mode 100644 +index 000000000000..448f3fa6c6e0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll +@@ -0,0 +1,939 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++;; SETEQ ++define void @v16i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp eq <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp eq <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp eq <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp eq <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vseqi.w $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp eq <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp eq <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vseqi.d $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp eq <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp eq <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLE ++define void @v16i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.b $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp sle <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp sle <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp sle <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp sle <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.w $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp sle <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp sle <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.d $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp sle <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp sle <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULE ++define void @v16i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.bu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp ule <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp ule <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.hu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp ule <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp ule <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp ule <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp ule <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.du $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp ule <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp ule <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLT ++define void @v16i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.b $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp slt <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp slt <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp slt <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp slt <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.w $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp slt <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp slt <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.d $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp slt <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp slt <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULT ++define void @v16i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.bu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp ult <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp ult <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.hu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp ult <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp ult <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp ult <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp ult <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.du $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp ult <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp ult <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETNE ++define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vxori.b $vr0, $vr0, 255 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp ne <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vrepli.b $vr1, -1 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp ne <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vrepli.b $vr1, -1 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp ne <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vrepli.b $vr1, -1 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp ne <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGE ++define void @v16i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp sge <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp sge <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp sge <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp sge <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGE ++define void @v16i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp uge <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp uge <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp uge <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp uge <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGT ++define void @v16i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp sgt <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp sgt <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp sgt <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp sgt <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGT ++define void @v16i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp ugt <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp ugt <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp ugt <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp ugt <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0030-LoongArch-Make-ISD-FSQRT-a-legal-operation-with-lsx-.patch b/0030-LoongArch-Make-ISD-FSQRT-a-legal-operation-with-lsx-.patch new file mode 100644 index 0000000..edd0691 --- /dev/null +++ b/0030-LoongArch-Make-ISD-FSQRT-a-legal-operation-with-lsx-.patch @@ -0,0 +1,373 @@ +From 1cb397dd6c4dc53e44d3f61906c6464a52342c73 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Fri, 8 Dec 2023 14:16:26 +0800 +Subject: [PATCH 30/42] [LoongArch] Make ISD::FSQRT a legal operation with + lsx/lasx feature (#74795) + +And add some patterns: +1. (fdiv 1.0, vector) +2. (fdiv 1.0, (fsqrt vector)) + +(cherry picked from commit 9f70e708a7d3fce97d63b626520351501455fca0) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 2 + + .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 45 +++++++++++++ + llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll | 65 +++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/fdiv.ll | 29 +++++++++ + llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll | 65 +++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/fdiv.ll | 29 +++++++++ + 7 files changed, 257 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 3d5ae6d3deda..8c54c7cf2cab 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -260,6 +260,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); ++ setOperationAction(ISD::FSQRT, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); +@@ -300,6 +301,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); ++ setOperationAction(ISD::FSQRT, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index a9bf65c6840d..55b90f4450c0 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1092,6 +1092,13 @@ multiclass PatXr { + (!cast(Inst#"_D") LASX256:$xj)>; + } + ++multiclass PatXrF { ++ def : Pat<(v8f32 (OpNode (v8f32 LASX256:$xj))), ++ (!cast(Inst#"_S") LASX256:$xj)>; ++ def : Pat<(v4f64 (OpNode (v4f64 LASX256:$xj))), ++ (!cast(Inst#"_D") LASX256:$xj)>; ++} ++ + multiclass PatXrXr { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; +@@ -1448,6 +1455,21 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + ++// XVFSQRT_{S/D} ++defm : PatXrF; ++ ++// XVRECIP_{S/D} ++def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj), ++ (XVFRECIP_S v8f32:$xj)>; ++def : Pat<(fdiv vsplatf64_fpimm_eq_1, v4f64:$xj), ++ (XVFRECIP_D v4f64:$xj)>; ++ ++// XVFRSQRT_{S/D} ++def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v8f32:$xj)), ++ (XVFRSQRT_S v8f32:$xj)>; ++def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v4f64:$xj)), ++ (XVFRSQRT_D v4f64:$xj)>; ++ + // XVSEQ[I]_{B/H/W/D} + defm : PatCCXrSimm5; + defm : PatCCXrXr; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index ff21c6681271..8ad0c5904f25 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -95,6 +95,29 @@ def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; + }]>; + ++def vsplatf32_fpimm_eq_1 ++ : PatFrags<(ops), [(bitconvert (v4i32 (build_vector))), ++ (bitconvert (v8i32 (build_vector)))], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && ++ Imm == APFloat(+1.0f).bitcastToAPInt(); ++}]>; ++def vsplatf64_fpimm_eq_1 ++ : PatFrags<(ops), [(bitconvert (v2i64 (build_vector))), ++ (bitconvert (v4i64 (build_vector)))], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && ++ Imm == APFloat(+1.0).bitcastToAPInt(); ++}]>; ++ + def vsplati8imm7 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati8_imm_eq_7)>; + def vsplati16imm15 : PatFrag<(ops node:$reg), +@@ -1173,6 +1196,13 @@ multiclass PatVr { + (!cast(Inst#"_D") LSX128:$vj)>; + } + ++multiclass PatVrF { ++ def : Pat<(v4f32 (OpNode (v4f32 LSX128:$vj))), ++ (!cast(Inst#"_S") LSX128:$vj)>; ++ def : Pat<(v2f64 (OpNode (v2f64 LSX128:$vj))), ++ (!cast(Inst#"_D") LSX128:$vj)>; ++} ++ + multiclass PatVrVr { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; +@@ -1525,6 +1555,21 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + ++// VFSQRT_{S/D} ++defm : PatVrF; ++ ++// VFRECIP_{S/D} ++def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj), ++ (VFRECIP_S v4f32:$vj)>; ++def : Pat<(fdiv vsplatf64_fpimm_eq_1, v2f64:$vj), ++ (VFRECIP_D v2f64:$vj)>; ++ ++// VFRSQRT_{S/D} ++def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v4f32:$vj)), ++ (VFRSQRT_S v4f32:$vj)>; ++def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v2f64:$vj)), ++ (VFRSQRT_D v2f64:$vj)>; ++ + // VSEQ[I]_{B/H/W/D} + defm : PatCCVrSimm5; + defm : PatCCVrVr; +diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll +new file mode 100644 +index 000000000000..c4a881bdeae9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll +@@ -0,0 +1,65 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++;; fsqrt ++define void @sqrt_v8f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sqrt_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0, align 16 ++ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) ++ store <8 x float> %sqrt, ptr %res, align 16 ++ ret void ++} ++ ++define void @sqrt_v4f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sqrt_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0, align 16 ++ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) ++ store <4 x double> %sqrt, ptr %res, align 16 ++ ret void ++} ++ ++;; 1.0 / (fsqrt vec) ++define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_div_sqrt_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0, align 16 ++ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) ++ %div = fdiv <8 x float> , %sqrt ++ store <8 x float> %div, ptr %res, align 16 ++ ret void ++} ++ ++define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_div_sqrt_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0, align 16 ++ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) ++ %div = fdiv <4 x double> , %sqrt ++ store <4 x double> %div, ptr %res, align 16 ++ ret void ++} ++ ++declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) ++declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +index 284121a79a49..6004565b0b78 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +@@ -32,3 +32,32 @@ entry: + store <4 x double> %v2, ptr %res + ret void + } ++ ++;; 1.0 / vec ++define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_fdiv_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfrecip.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %div = fdiv <8 x float> , %v0 ++ store <8 x float> %div, ptr %res ++ ret void ++} ++ ++define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_fdiv_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfrecip.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %div = fdiv <4 x double> , %v0 ++ store <4 x double> %div, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll +new file mode 100644 +index 000000000000..a57bc1ca0e94 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll +@@ -0,0 +1,65 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++;; fsqrt ++define void @sqrt_v4f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sqrt_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfsqrt.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0, align 16 ++ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) ++ store <4 x float> %sqrt, ptr %res, align 16 ++ ret void ++} ++ ++define void @sqrt_v2f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sqrt_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfsqrt.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0, align 16 ++ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) ++ store <2 x double> %sqrt, ptr %res, align 16 ++ ret void ++} ++ ++;; 1.0 / (fsqrt vec) ++define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_div_sqrt_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0, align 16 ++ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) ++ %div = fdiv <4 x float> , %sqrt ++ store <4 x float> %div, ptr %res, align 16 ++ ret void ++} ++ ++define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_div_sqrt_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0, align 16 ++ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) ++ %div = fdiv <2 x double> , %sqrt ++ store <2 x double> %div, ptr %res, align 16 ++ ret void ++} ++ ++declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) ++declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +index eb7c8bd9616e..5f1ee9e4d212 100644 +--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +@@ -32,3 +32,32 @@ entry: + store <2 x double> %v2, ptr %res + ret void + } ++ ++;; 1.0 / vec ++define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_fdiv_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfrecip.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %div = fdiv <4 x float> , %v0 ++ store <4 x float> %div, ptr %res ++ ret void ++} ++ ++define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_fdiv_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfrecip.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %div = fdiv <2 x double> , %v0 ++ store <2 x double> %div, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0031-LoongArch-Mark-ISD-FNEG-as-legal.patch b/0031-LoongArch-Mark-ISD-FNEG-as-legal.patch new file mode 100644 index 0000000..38be681 --- /dev/null +++ b/0031-LoongArch-Mark-ISD-FNEG-as-legal.patch @@ -0,0 +1,140 @@ +From db8e4fd0222d4c3fb62d4ebf1ac19df1f18902df Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Fri, 8 Dec 2023 14:21:10 +0800 +Subject: [PATCH 31/42] [LoongArch] Mark ISD::FNEG as legal + +(cherry picked from commit cdc37325669c0321328a7245083c427b229e79e9) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 2 ++ + .../LoongArch/LoongArchLASXInstrInfo.td | 4 +++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 +++ + .../LoongArch/lasx/ir-instruction/fneg.ll | 29 +++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/fneg.ll | 29 +++++++++++++++++++ + 5 files changed, 68 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 8c54c7cf2cab..c7f4b1d24f07 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -261,6 +261,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); ++ setOperationAction(ISD::FNEG, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); +@@ -302,6 +303,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); ++ setOperationAction(ISD::FNEG, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 55b90f4450c0..8559baa0e525 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1605,6 +1605,10 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in + def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), + (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; + ++// fneg ++def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; ++def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 8ad0c5904f25..5947f241bb59 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1712,6 +1712,10 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in + def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), + (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; + ++// fneg ++def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; ++def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll +new file mode 100644 +index 000000000000..5eb468fc55a0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll +@@ -0,0 +1,29 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fneg_v8f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: fneg_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = fneg <8 x float> %v0 ++ store <8 x float> %v1, ptr %res ++ ret void ++} ++define void @fneg_v4f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: fneg_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = fneg <4 x double> %v0 ++ store <4 x double> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll +new file mode 100644 +index 000000000000..795c1ac8b368 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll +@@ -0,0 +1,29 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fneg_v4f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: fneg_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = fneg <4 x float> %v0 ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++define void @fneg_v2f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: fneg_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = fneg <2 x double> %v0 ++ store <2 x double> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0032-LoongArch-Add-codegen-support-for-X-VF-MSUB-NMADD-NM.patch b/0032-LoongArch-Add-codegen-support-for-X-VF-MSUB-NMADD-NM.patch new file mode 100644 index 0000000..6b19757 --- /dev/null +++ b/0032-LoongArch-Add-codegen-support-for-X-VF-MSUB-NMADD-NM.patch @@ -0,0 +1,3341 @@ +From 3cfbf955ff5b4a50785d3e8e29abcb73fed7e747 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Mon, 11 Dec 2023 10:37:22 +0800 +Subject: [PATCH 32/42] [LoongArch] Add codegen support for + [X]VF{MSUB/NMADD/NMSUB}.{S/D} instructions (#74819) + +This is similar to single and double-precision floating-point +instructions. + +(cherry picked from commit af999c4be9f5643724c6f379690ecee4346b2b48) + +--- + .../LoongArch/LoongArchLASXInstrInfo.td | 26 + + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 26 + + llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll | 804 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll | 804 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll | 804 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll | 804 ++++++++++++++++++ + 6 files changed, 3268 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 8559baa0e525..ec6983d0f487 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1455,6 +1455,32 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + ++// XVFMSUB_{S/D} ++def : Pat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), ++ (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), ++ (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++// XVFNMADD_{S/D} ++def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, v8f32:$xa)), ++ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, v4f64:$xa)), ++ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), ++ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), ++ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++// XVFNMSUB_{S/D} ++def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa))), ++ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa))), ++ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), ++ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), ++ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ + // XVFSQRT_{S/D} + defm : PatXrF; + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 5947f241bb59..e468176885d7 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1555,6 +1555,32 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + ++// VFMSUB_{S/D} ++def : Pat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), ++ (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), ++ (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++// VFNMADD_{S/D} ++def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, v4f32:$va)), ++ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, v2f64:$va)), ++ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), ++ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), ++ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++// VFNMSUB_{S/D} ++def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va))), ++ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va))), ++ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), ++ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), ++ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ + // VFSQRT_{S/D} + defm : PatVrF; + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll +new file mode 100644 +index 000000000000..af18c52b096c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll +@@ -0,0 +1,804 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ++ ++define void @xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul<4 x double> %v0, %v1 ++ %add = fadd<4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul<4 x double> %v0, %v1 ++ %sub = fsub<4 x double> %mul, %v2 ++ store <4 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul<4 x double> %v0, %v1 ++ %add = fadd<4 x double> %mul, %v2 ++ %negadd = fneg<4 x double> %add ++ store <4 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg nsz<4 x double> %v0 ++ %negv2 = fneg nsz<4 x double> %v2 ++ %mul = fmul nsz<4 x double> %negv0, %v1 ++ %add = fadd nsz<4 x double> %mul, %negv2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that xvfnmadd.d is not emitted. ++define void @not_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_xvfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_xvfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_xvfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg<4 x double> %v0 ++ %negv2 = fneg<4 x double> %v2 ++ %mul = fmul<4 x double> %negv0, %v1 ++ %add = fadd<4 x double> %mul, %negv2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv2 = fneg<4 x double> %v2 ++ %mul = fmul<4 x double> %v0, %v1 ++ %add = fadd<4 x double> %mul, %negv2 ++ %neg = fneg<4 x double> %add ++ store <4 x double> %neg, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg nsz<4 x double> %v0 ++ %mul = fmul nsz<4 x double> %negv0, %v1 ++ %add = fadd nsz<4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that xvfnmsub.d is not emitted. ++define void @not_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_xvfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_xvfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_xvfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg<4 x double> %v0 ++ %mul = fmul<4 x double> %negv0, %v1 ++ %add = fadd<4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %sub = fsub contract <4 x double> %mul, %v2 ++ store <4 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ %negadd = fneg contract <4 x double> %add ++ store <4 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg contract nsz<4 x double> %v0 ++ %negv2 = fneg contract nsz<4 x double> %v2 ++ %mul = fmul contract nsz<4 x double> %negv0, %v1 ++ %add = fadd contract nsz<4 x double> %mul, %negv2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that xvfnmadd.d is not emitted. ++define void @not_contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_xvfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg contract <4 x double> %v0 ++ %negv2 = fneg contract <4 x double> %v2 ++ %mul = fmul contract <4 x double> %negv0, %v1 ++ %add = fadd contract <4 x double> %mul, %negv2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv2 = fneg contract <4 x double> %v2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %negv2 ++ %neg = fneg contract <4 x double> %add ++ store <4 x double> %neg, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg contract nsz<4 x double> %v0 ++ %mul = fmul contract nsz<4 x double> %negv0, %v1 ++ %add = fadd contract nsz<4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that xvfnmsub.d is not emitted. ++define void @not_contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_xvfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg contract <4 x double> %v0 ++ %mul = fmul contract <4 x double> %negv0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmadd_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmadd_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmadd_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmsub_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmsub_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmsub_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %sub = fsub contract <4 x double> %mul, %v2 ++ store <4 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ %negadd = fneg contract <4 x double> %add ++ store <4 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %negv2 = fneg contract <4 x double> %v2 ++ %add = fadd contract <4 x double> %negv2, %mul ++ %negadd = fneg contract <4 x double> %add ++ store <4 x double> %negadd, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll +new file mode 100644 +index 000000000000..b7b3cb3a2e66 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll +@@ -0,0 +1,804 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ++ ++define void @xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul<8 x float> %v0, %v1 ++ %add = fadd<8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul<8 x float> %v0, %v1 ++ %sub = fsub<8 x float> %mul, %v2 ++ store <8 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul<8 x float> %v0, %v1 ++ %add = fadd<8 x float> %mul, %v2 ++ %negadd = fneg<8 x float> %add ++ store <8 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg nsz<8 x float> %v0 ++ %negv2 = fneg nsz<8 x float> %v2 ++ %mul = fmul nsz<8 x float> %negv0, %v1 ++ %add = fadd nsz<8 x float> %mul, %negv2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that fnmadd.s is not emitted. ++define void @not_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_xvfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_xvfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_xvfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg<8 x float> %v0 ++ %negv2 = fneg<8 x float> %v2 ++ %mul = fmul<8 x float> %negv0, %v1 ++ %add = fadd<8 x float> %mul, %negv2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv2 = fneg<8 x float> %v2 ++ %mul = fmul<8 x float> %v0, %v1 ++ %add = fadd<8 x float> %mul, %negv2 ++ %neg = fneg<8 x float> %add ++ store <8 x float> %neg, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg nsz<8 x float> %v0 ++ %mul = fmul nsz<8 x float> %negv0, %v1 ++ %add = fadd nsz<8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that fnmsub.s is not emitted. ++define void @not_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_xvfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_xvfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_xvfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg<8 x float> %v0 ++ %mul = fmul<8 x float> %negv0, %v1 ++ %add = fadd<8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %sub = fsub contract <8 x float> %mul, %v2 ++ store <8 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ %negadd = fneg contract <8 x float> %add ++ store <8 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg contract nsz<8 x float> %v0 ++ %negv2 = fneg contract nsz<8 x float> %v2 ++ %mul = fmul contract nsz<8 x float> %negv0, %v1 ++ %add = fadd contract nsz<8 x float> %mul, %negv2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that fnmadd.s is not emitted. ++define void @not_contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_xvfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg contract <8 x float> %v0 ++ %negv2 = fneg contract <8 x float> %v2 ++ %mul = fmul contract <8 x float> %negv0, %v1 ++ %add = fadd contract <8 x float> %mul, %negv2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv2 = fneg contract <8 x float> %v2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %negv2 ++ %neg = fneg contract <8 x float> %add ++ store <8 x float> %neg, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg contract nsz<8 x float> %v0 ++ %mul = fmul contract nsz<8 x float> %negv0, %v1 ++ %add = fadd contract nsz<8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that fnmsub.s is not emitted. ++define void @not_contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_xvfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg contract <8 x float> %v0 ++ %mul = fmul contract <8 x float> %negv0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmadd_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmadd_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmadd_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmsub_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmsub_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmsub_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %sub = fsub contract <8 x float> %mul, %v2 ++ store <8 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ %negadd = fneg contract <8 x float> %add ++ store <8 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %negv2 = fneg contract <8 x float> %v2 ++ %add = fadd contract <8 x float> %negv2, %mul ++ %negadd = fneg contract <8 x float> %add ++ store <8 x float> %negadd, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll +new file mode 100644 +index 000000000000..8e0459b4afab +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll +@@ -0,0 +1,804 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ++ ++define void @vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul<2 x double> %v0, %v1 ++ %add = fadd<2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul<2 x double> %v0, %v1 ++ %sub = fsub<2 x double> %mul, %v2 ++ store <2 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul<2 x double> %v0, %v1 ++ %add = fadd<2 x double> %mul, %v2 ++ %negadd = fneg<2 x double> %add ++ store <2 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg nsz<2 x double> %v0 ++ %negv2 = fneg nsz<2 x double> %v2 ++ %mul = fmul nsz<2 x double> %negv0, %v1 ++ %add = fadd nsz<2 x double> %mul, %negv2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmadd.d is not emitted. ++define void @not_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_vfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_vfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_vfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg<2 x double> %v0 ++ %negv2 = fneg<2 x double> %v2 ++ %mul = fmul<2 x double> %negv0, %v1 ++ %add = fadd<2 x double> %mul, %negv2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv2 = fneg<2 x double> %v2 ++ %mul = fmul<2 x double> %v0, %v1 ++ %add = fadd<2 x double> %mul, %negv2 ++ %neg = fneg<2 x double> %add ++ store <2 x double> %neg, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg nsz<2 x double> %v0 ++ %mul = fmul nsz<2 x double> %negv0, %v1 ++ %add = fadd nsz<2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmsub.d is not emitted. ++define void @not_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_vfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_vfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_vfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg<2 x double> %v0 ++ %mul = fmul<2 x double> %negv0, %v1 ++ %add = fadd<2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %sub = fsub contract <2 x double> %mul, %v2 ++ store <2 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ %negadd = fneg contract <2 x double> %add ++ store <2 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmadd_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmadd_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmadd_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg contract nsz<2 x double> %v0 ++ %negv2 = fneg contract nsz<2 x double> %v2 ++ %mul = fmul contract nsz<2 x double> %negv0, %v1 ++ %add = fadd contract nsz<2 x double> %mul, %negv2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmadd.d is not emitted. ++define void @not_contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_vfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_vfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_vfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg contract <2 x double> %v0 ++ %negv2 = fneg contract <2 x double> %v2 ++ %mul = fmul contract <2 x double> %negv0, %v1 ++ %add = fadd contract <2 x double> %mul, %negv2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv2 = fneg contract <2 x double> %v2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %negv2 ++ %neg = fneg contract <2 x double> %add ++ store <2 x double> %neg, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmsub_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmsub_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmsub_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg contract nsz<2 x double> %v0 ++ %mul = fmul contract nsz<2 x double> %negv0, %v1 ++ %add = fadd contract nsz<2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmsub.d is not emitted. ++define void @not_contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_vfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_vfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_vfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg contract <2 x double> %v0 ++ %mul = fmul contract <2 x double> %negv0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @vfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmadd_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmadd_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmadd_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @vfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmsub_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmsub_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmsub_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %sub = fsub contract <2 x double> %mul, %v2 ++ store <2 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ %negadd = fneg contract <2 x double> %add ++ store <2 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %negv2 = fneg contract <2 x double> %v2 ++ %add = fadd contract <2 x double> %negv2, %mul ++ %negadd = fneg contract <2 x double> %add ++ store <2 x double> %negadd, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll +new file mode 100644 +index 000000000000..7efbd61c0c4f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll +@@ -0,0 +1,804 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ++ ++define void @vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul<4 x float> %v0, %v1 ++ %add = fadd<4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul<4 x float> %v0, %v1 ++ %sub = fsub<4 x float> %mul, %v2 ++ store <4 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul<4 x float> %v0, %v1 ++ %add = fadd<4 x float> %mul, %v2 ++ %negadd = fneg<4 x float> %add ++ store <4 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg nsz<4 x float> %v0 ++ %negv2 = fneg nsz<4 x float> %v2 ++ %mul = fmul nsz<4 x float> %negv0, %v1 ++ %add = fadd nsz<4 x float> %mul, %negv2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmadd.s is not emitted. ++define void @not_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_vfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_vfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_vfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg<4 x float> %v0 ++ %negv2 = fneg<4 x float> %v2 ++ %mul = fmul<4 x float> %negv0, %v1 ++ %add = fadd<4 x float> %mul, %negv2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv2 = fneg<4 x float> %v2 ++ %mul = fmul<4 x float> %v0, %v1 ++ %add = fadd<4 x float> %mul, %negv2 ++ %neg = fneg<4 x float> %add ++ store <4 x float> %neg, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg nsz<4 x float> %v0 ++ %mul = fmul nsz<4 x float> %negv0, %v1 ++ %add = fadd nsz<4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmsub.s is not emitted. ++define void @not_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_vfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_vfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_vfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg<4 x float> %v0 ++ %mul = fmul<4 x float> %negv0, %v1 ++ %add = fadd<4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %sub = fsub contract <4 x float> %mul, %v2 ++ store <4 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ %negadd = fneg contract <4 x float> %add ++ store <4 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmadd_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmadd_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmadd_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg contract nsz<4 x float> %v0 ++ %negv2 = fneg contract nsz<4 x float> %v2 ++ %mul = fmul contract nsz<4 x float> %negv0, %v1 ++ %add = fadd contract nsz<4 x float> %mul, %negv2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmadd.s is not emitted. ++define void @not_contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_vfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_vfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_vfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg contract <4 x float> %v0 ++ %negv2 = fneg contract <4 x float> %v2 ++ %mul = fmul contract <4 x float> %negv0, %v1 ++ %add = fadd contract <4 x float> %mul, %negv2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv2 = fneg contract <4 x float> %v2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %negv2 ++ %neg = fneg contract <4 x float> %add ++ store <4 x float> %neg, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmsub_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmsub_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmsub_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg contract nsz<4 x float> %v0 ++ %mul = fmul contract nsz<4 x float> %negv0, %v1 ++ %add = fadd contract nsz<4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmsub.s is not emitted. ++define void @not_contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_vfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_vfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_vfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg contract <4 x float> %v0 ++ %mul = fmul contract <4 x float> %negv0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @vfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmadd_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmadd_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmadd_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @vfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmsub_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmsub_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmsub_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %sub = fsub contract <4 x float> %mul, %v2 ++ store <4 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ %negadd = fneg contract <4 x float> %add ++ store <4 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %negv2 = fneg contract <4 x float> %v2 ++ %add = fadd contract <4 x float> %negv2, %mul ++ %negadd = fneg contract <4 x float> %add ++ store <4 x float> %negadd, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0033-LoongArch-Fix-LASX-vector_extract-codegen.patch b/0033-LoongArch-Fix-LASX-vector_extract-codegen.patch new file mode 100644 index 0000000..672ce0f --- /dev/null +++ b/0033-LoongArch-Fix-LASX-vector_extract-codegen.patch @@ -0,0 +1,328 @@ +From 0c21388d176b7f6d9249f47487c4368eec0ae508 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Mon, 25 Dec 2023 10:09:20 +0800 +Subject: [PATCH 33/42] [LoongArch] Fix LASX vector_extract codegen + +Custom lowering `ISD::EXTRACT_VECTOR_ELT` with lasx. + +(cherry picked from commit 47c88bcd5de91522241cca1aaa1b7762ceb01394) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 21 +++- + .../Target/LoongArch/LoongArchISelLowering.h | 1 + + .../LoongArch/LoongArchLASXInstrInfo.td | 40 ++---- + .../lasx/ir-instruction/extractelement.ll | 114 ++++++++++++++---- + 4 files changed, 119 insertions(+), 57 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index c7f4b1d24f07..cf881ce720a6 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -277,7 +277,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::UNDEF, VT, Legal); + + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); +- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + + setOperationAction(ISD::SETCC, VT, Legal); +@@ -395,6 +395,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + return lowerWRITE_REGISTER(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); ++ case ISD::EXTRACT_VECTOR_ELT: ++ return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: +@@ -502,6 +504,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, + return SDValue(); + } + ++SDValue ++LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ EVT VecTy = Op->getOperand(0)->getValueType(0); ++ SDValue Idx = Op->getOperand(1); ++ EVT EltTy = VecTy.getVectorElementType(); ++ unsigned NumElts = VecTy.getVectorNumElements(); ++ ++ if (isa(Idx) && ++ (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || ++ EltTy == MVT::f64 || ++ cast(Idx)->getZExtValue() < NumElts / 2)) ++ return Op; ++ ++ return SDValue(); ++} ++ + SDValue + LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 2c35f9e5d378..6b5a851ec55d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -279,6 +279,7 @@ private: + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index ec6983d0f487..9b7a34688811 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1590,38 +1590,14 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), + (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; + def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), + (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; +-def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), +- (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; +-def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), +- (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; +-def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), +- (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; +-def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), +- (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; +- +-// Vector extraction with variable index. +-def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), +- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, +- i64:$rk), +- sub_32)), +- GPR), (i64 24))>; +-def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), +- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, +- i64:$rk), +- sub_32)), +- GPR), (i64 16))>; +-def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), +- (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), +- sub_32)), +- GPR)>; +-def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), +- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), +- sub_64)), +- GPR)>; +-def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), +- (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; +-def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), +- (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; ++def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), ++ (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; ++def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), ++ (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; ++def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)), ++ (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>; ++def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), ++ (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; + + // vselect + def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +index 78f584cd09a8..02b76bf75b75 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +@@ -31,7 +31,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind { + ; CHECK-LABEL: extract_8xi32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 ++; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 + ; CHECK-NEXT: st.w $a0, $a1, 0 + ; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src +@@ -44,7 +44,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind { + ; CHECK-LABEL: extract_4xi64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 ++; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 + ; CHECK-NEXT: st.d $a0, $a1, 0 + ; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src +@@ -57,8 +57,8 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { + ; CHECK-LABEL: extract_8xfloat: + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: ori $a0, $zero, 7 +-; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 ++; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 ++; CHECK-NEXT: movgr2fr.w $fa0, $a0 + ; CHECK-NEXT: fst.s $fa0, $a1, 0 + ; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src +@@ -71,8 +71,8 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { + ; CHECK-LABEL: extract_4xdouble: + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: ori $a0, $zero, 3 +-; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 ++; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 ++; CHECK-NEXT: movgr2fr.d $fa0, $a0 + ; CHECK-NEXT: fst.d $fa0, $a1, 0 + ; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src +@@ -84,12 +84,22 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { + define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_32xi8_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2 +-; CHECK-NEXT: movfr2gr.s $a0, $fa0 +-; CHECK-NEXT: srai.w $a0, $a0, 24 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0 ++; CHECK-NEXT: ld.b $a0, $a0, 0 + ; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %e = extractelement <32 x i8> %v, i32 %idx +@@ -100,12 +110,22 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_16xi16_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2 +-; CHECK-NEXT: movfr2gr.s $a0, $fa0 +-; CHECK-NEXT: srai.w $a0, $a0, 16 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1 ++; CHECK-NEXT: ld.h $a0, $a0, 0 + ; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %e = extractelement <16 x i16> %v, i32 %idx +@@ -116,11 +136,22 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_8xi32_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 +-; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 ++; CHECK-NEXT: ld.w $a0, $a0, 0 + ; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %e = extractelement <8 x i32> %v, i32 %idx +@@ -131,11 +162,22 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_4xi64_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 +-; CHECK-NEXT: movfr2gr.d $a0, $fa0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 ++; CHECK-NEXT: ld.d $a0, $a0, 0 + ; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %e = extractelement <4 x i64> %v, i32 %idx +@@ -146,10 +188,22 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_8xfloat_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 ++; CHECK-NEXT: fld.s $fa0, $a0, 0 + ; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %e = extractelement <8 x float> %v, i32 %idx +@@ -160,10 +214,22 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_4xdouble_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 ++; CHECK-NEXT: fld.d $fa0, $a0, 0 + ; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %e = extractelement <4 x double> %v, i32 %idx +-- +2.20.1 + diff --git a/0034-LoongArch-Fix-incorrect-pattern-XVREPL128VEI_-W-D-in.patch b/0034-LoongArch-Fix-incorrect-pattern-XVREPL128VEI_-W-D-in.patch new file mode 100644 index 0000000..7b0f1db --- /dev/null +++ b/0034-LoongArch-Fix-incorrect-pattern-XVREPL128VEI_-W-D-in.patch @@ -0,0 +1,60 @@ +From 487d08e5156f328b1f2d8da7b51c4c04c4e117d6 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 27 Dec 2023 16:31:49 +0800 +Subject: [PATCH 34/42] [LoongArch] Fix incorrect pattern XVREPL128VEI_{W/D} + instructions + +Remove the incorrect patterns for `XVREPL128VEI_{W/D}` instructions, +and add correct patterns for XVREPLVE0_{W/D} instructions + +(cherry picked from commit c7367f985e0d27aeb8bc993406d1b9f4ca307399) + +--- + llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 6 +++--- + llvm/test/CodeGen/LoongArch/lasx/build-vector.ll | 4 ++-- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 9b7a34688811..059689cef840 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1571,11 +1571,11 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), + def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), + (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; + +-// XVREPL128VEI_{W/D} ++// XVREPLVE0_{W/D} + def : Pat<(lasxsplatf32 FPR32:$fj), +- (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; ++ (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; + def : Pat<(lasxsplatf64 FPR64:$fj), +- (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; ++ (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>; + + // Loads/Stores + foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { +diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +index 6824ab5cda8d..ae6f31aaec64 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +@@ -57,7 +57,7 @@ define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { + ; CHECK-LABEL: buildvector_v8f32_splat: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +-; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0 ++; CHECK-NEXT: xvreplve0.w $xr0, $xr0 + ; CHECK-NEXT: xvst $xr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +@@ -71,7 +71,7 @@ define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { + ; CHECK-LABEL: buildvector_v4f64_splat: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +-; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 ++; CHECK-NEXT: xvreplve0.d $xr0, $xr0 + ; CHECK-NEXT: xvst $xr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +-- +2.20.1 + diff --git a/0035-LoongArch-Fix-incorrect-pattern-X-VBITSELI_B-instruc.patch b/0035-LoongArch-Fix-incorrect-pattern-X-VBITSELI_B-instruc.patch new file mode 100644 index 0000000..a1b72c2 --- /dev/null +++ b/0035-LoongArch-Fix-incorrect-pattern-X-VBITSELI_B-instruc.patch @@ -0,0 +1,88 @@ +From 5766f4f5b78d1a5e07022eda3fc46657bbfbe3a4 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Thu, 28 Dec 2023 20:56:32 +0800 +Subject: [PATCH 35/42] [LoongArch] Fix incorrect pattern [X]VBITSELI_B + instructions + +Adjusted the operand order of [X]VBITSELI_B to correctly match vselect. + +(cherry picked from commit da5378e87e11689d05a58198d6e15e9551916794) + +--- + llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 4 ++-- + llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 4 ++-- + llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 6 +++--- + llvm/test/CodeGen/LoongArch/lsx/vselect.ll | 6 +++--- + 4 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 059689cef840..b3c11bc5423d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1600,8 +1600,8 @@ def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), + (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; + + // vselect +-def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, +- (v32i8 (SplatPat_uimm8 uimm8:$imm)))), ++def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)), ++ LASX256:$xj)), + (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; + foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in + def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index e468176885d7..5569c2cd15b5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1731,8 +1731,8 @@ def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), + (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; + + // vselect +-def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, +- (v16i8 (SplatPat_uimm8 uimm8:$imm)))), ++def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)), ++ LSX128:$vj)), + (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; + foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in + def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), +diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +index 24f4bcf752d3..ec2fc28db33c 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +@@ -6,11 +6,11 @@ define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a1, 0 + ; CHECK-NEXT: xvrepli.h $xr1, -256 +-; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 +-; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: xvbitseli.b $xr1, $xr0, 1 ++; CHECK-NEXT: xvst $xr1, $a0, 0 + ; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 +- %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> ++ %sel = select <32 x i1> , <32 x i8> , <32 x i8> %v0 + store <32 x i8> %sel, ptr %res + ret void + } +diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +index 00e3d9313f13..746152f0f026 100644 +--- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll ++++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +@@ -6,11 +6,11 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { + ; CHECK: # %bb.0: + ; CHECK-NEXT: vld $vr0, $a1, 0 + ; CHECK-NEXT: vrepli.h $vr1, -256 +-; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 +-; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: vbitseli.b $vr1, $vr0, 255 ++; CHECK-NEXT: vst $vr1, $a0, 0 + ; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 +- %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> ++ %sel = select <16 x i1> , <16 x i8> , <16 x i8> %v0 + store <16 x i8> %sel, ptr %res + ret void + } +-- +2.20.1 + diff --git a/0036-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch b/0036-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch new file mode 100644 index 0000000..84d21d4 --- /dev/null +++ b/0036-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch @@ -0,0 +1,32009 @@ +From 90be01b428b95f0396db80eb5d12c9cea2a6fe20 Mon Sep 17 00:00:00 2001 +From: yjijd +Date: Tue, 2 Jan 2024 11:46:00 +0800 +Subject: [PATCH 36/42] [Clang][LoongArch] Do not pass vector arguments via + vector registers (#74990) + +psABI v2.30 clarifies that vector arguments are passed according to the +base ABI by default. + +(cherry picked from commit 0e01c72c5645259d9a08a1a7ed39cb5cc41ce311) + +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 7 - + .../CodeGen/LoongArch/lasx/builtin-alias.c | 4876 ++++++++++++----- + clang/test/CodeGen/LoongArch/lasx/builtin.c | 4874 +++++++++++----- + .../CodeGen/LoongArch/lsx/builtin-alias.c | 4746 +++++++++++----- + clang/test/CodeGen/LoongArch/lsx/builtin.c | 4746 +++++++++++----- + 5 files changed, 13485 insertions(+), 5764 deletions(-) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 26c68c3583b2..7483bf6d6d1e 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -321,13 +321,6 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + return ABIArgInfo::getDirect(); + } + +- // Pass 128-bit/256-bit vector values via vector registers directly. +- if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && +- (getTarget().hasFeature("lsx"))) || +- ((getContext().getTypeSize(Ty) == 256) && +- getTarget().hasFeature("lasx")))) +- return ABIArgInfo::getDirect(); +- + // Complex types for the *f or *d ABI must be passed directly rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +index 09b2d5fcacf5..9a8ce224bcfd 100644 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +@@ -5,4426 +5,6382 @@ + + // CHECK-LABEL: @xvsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } + // CHECK-LABEL: @xvsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } + // CHECK-LABEL: @xvsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } + // CHECK-LABEL: @xvsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } + // CHECK-LABEL: @xvslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } + // CHECK-LABEL: @xvslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } + // CHECK-LABEL: @xvslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } + // CHECK-LABEL: @xvslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } + // CHECK-LABEL: @xvsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } + // CHECK-LABEL: @xvsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } + // CHECK-LABEL: @xvsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } + // CHECK-LABEL: @xvsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } + // CHECK-LABEL: @xvsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } + // CHECK-LABEL: @xvsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } + // CHECK-LABEL: @xvsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } + // CHECK-LABEL: @xvsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } + // CHECK-LABEL: @xvsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } + // CHECK-LABEL: @xvsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } + // CHECK-LABEL: @xvsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } + // CHECK-LABEL: @xvsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } + // CHECK-LABEL: @xvsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } + // CHECK-LABEL: @xvsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } + // CHECK-LABEL: @xvsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } + // CHECK-LABEL: @xvsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } + // CHECK-LABEL: @xvsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } + // CHECK-LABEL: @xvsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } + // CHECK-LABEL: @xvsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } + // CHECK-LABEL: @xvsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } + // CHECK-LABEL: @xvsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } + // CHECK-LABEL: @xvsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } + // CHECK-LABEL: @xvsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } + // CHECK-LABEL: @xvsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } + // CHECK-LABEL: @xvsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } + // CHECK-LABEL: @xvsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } + // CHECK-LABEL: @xvsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } + // CHECK-LABEL: @xvsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } + // CHECK-LABEL: @xvsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } + // CHECK-LABEL: @xvsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } + // CHECK-LABEL: @xvsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } + // CHECK-LABEL: @xvsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } + // CHECK-LABEL: @xvbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } + // CHECK-LABEL: @xvbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } + // CHECK-LABEL: @xvbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } + // CHECK-LABEL: @xvbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } + // CHECK-LABEL: @xvbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } + // CHECK-LABEL: @xvbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } + // CHECK-LABEL: @xvbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } + // CHECK-LABEL: @xvbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } + // CHECK-LABEL: @xvbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } + // CHECK-LABEL: @xvbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } + // CHECK-LABEL: @xvbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } + // CHECK-LABEL: @xvbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } + // CHECK-LABEL: @xvbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } + // CHECK-LABEL: @xvbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } + // CHECK-LABEL: @xvbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } + // CHECK-LABEL: @xvbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } + // CHECK-LABEL: @xvbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } + // CHECK-LABEL: @xvbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } + // CHECK-LABEL: @xvbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } + // CHECK-LABEL: @xvbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } + // CHECK-LABEL: @xvbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } + // CHECK-LABEL: @xvbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } + // CHECK-LABEL: @xvbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } + // CHECK-LABEL: @xvbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } + // CHECK-LABEL: @xvadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } + // CHECK-LABEL: @xvadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } + // CHECK-LABEL: @xvadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } + // CHECK-LABEL: @xvadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } + // CHECK-LABEL: @xvaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } + // CHECK-LABEL: @xvaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } + // CHECK-LABEL: @xvaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } + // CHECK-LABEL: @xvaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } + // CHECK-LABEL: @xvsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } + // CHECK-LABEL: @xvsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } + // CHECK-LABEL: @xvsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } + // CHECK-LABEL: @xvsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } + // CHECK-LABEL: @xvsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } + // CHECK-LABEL: @xvsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } + // CHECK-LABEL: @xvsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } + // CHECK-LABEL: @xvsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } + // CHECK-LABEL: @xvmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } + // CHECK-LABEL: @xvmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } + // CHECK-LABEL: @xvmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } + // CHECK-LABEL: @xvmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } + // CHECK-LABEL: @xvmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } + // CHECK-LABEL: @xvmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } + // CHECK-LABEL: @xvmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } + // CHECK-LABEL: @xvmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } + // CHECK-LABEL: @xvmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } + // CHECK-LABEL: @xvmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } + // CHECK-LABEL: @xvmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } + // CHECK-LABEL: @xvmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } + // CHECK-LABEL: @xvmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } + // CHECK-LABEL: @xvmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } + // CHECK-LABEL: @xvmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } + // CHECK-LABEL: @xvmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } + // CHECK-LABEL: @xvmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } + // CHECK-LABEL: @xvmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } + // CHECK-LABEL: @xvmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } + // CHECK-LABEL: @xvmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } + // CHECK-LABEL: @xvmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } + // CHECK-LABEL: @xvmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } + // CHECK-LABEL: @xvmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } + // CHECK-LABEL: @xvmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } + // CHECK-LABEL: @xvmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } + // CHECK-LABEL: @xvmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } + // CHECK-LABEL: @xvmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } + // CHECK-LABEL: @xvmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } + // CHECK-LABEL: @xvmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } + // CHECK-LABEL: @xvmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } + // CHECK-LABEL: @xvmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } + // CHECK-LABEL: @xvmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } + // CHECK-LABEL: @xvseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } + // CHECK-LABEL: @xvseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } + // CHECK-LABEL: @xvseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } + // CHECK-LABEL: @xvseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } + // CHECK-LABEL: @xvseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } + // CHECK-LABEL: @xvseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } + // CHECK-LABEL: @xvseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } + // CHECK-LABEL: @xvseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } + // CHECK-LABEL: @xvslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } + // CHECK-LABEL: @xvslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } + // CHECK-LABEL: @xvslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } + // CHECK-LABEL: @xvslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } + // CHECK-LABEL: @xvslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } + // CHECK-LABEL: @xvslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } + // CHECK-LABEL: @xvslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } + // CHECK-LABEL: @xvslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } + // CHECK-LABEL: @xvslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } + // CHECK-LABEL: @xvslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } + // CHECK-LABEL: @xvslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } + // CHECK-LABEL: @xvslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } + // CHECK-LABEL: @xvslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } + // CHECK-LABEL: @xvslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } + // CHECK-LABEL: @xvslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } + // CHECK-LABEL: @xvslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } + // CHECK-LABEL: @xvsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } + // CHECK-LABEL: @xvsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } + // CHECK-LABEL: @xvsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } + // CHECK-LABEL: @xvsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } + // CHECK-LABEL: @xvslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } + // CHECK-LABEL: @xvslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } + // CHECK-LABEL: @xvslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } + // CHECK-LABEL: @xvslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } + // CHECK-LABEL: @xvsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } + // CHECK-LABEL: @xvsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } + // CHECK-LABEL: @xvsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } + // CHECK-LABEL: @xvsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } + // CHECK-LABEL: @xvslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } + // CHECK-LABEL: @xvslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } + // CHECK-LABEL: @xvslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } + // CHECK-LABEL: @xvslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } + // CHECK-LABEL: @xvsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } + // CHECK-LABEL: @xvsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } + // CHECK-LABEL: @xvsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } + // CHECK-LABEL: @xvsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } + // CHECK-LABEL: @xvsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } + // CHECK-LABEL: @xvsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } + // CHECK-LABEL: @xvsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } + // CHECK-LABEL: @xvsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } + // CHECK-LABEL: @xvadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } + // CHECK-LABEL: @xvadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } + // CHECK-LABEL: @xvadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } + // CHECK-LABEL: @xvadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } + // CHECK-LABEL: @xvsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } + // CHECK-LABEL: @xvsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } + // CHECK-LABEL: @xvsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } + // CHECK-LABEL: @xvsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } + // CHECK-LABEL: @xvsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } + // CHECK-LABEL: @xvsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } + // CHECK-LABEL: @xvsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } + // CHECK-LABEL: @xvsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } + // CHECK-LABEL: @xvavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } + // CHECK-LABEL: @xvavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } + // CHECK-LABEL: @xvavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } + // CHECK-LABEL: @xvavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } + // CHECK-LABEL: @xvavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } + // CHECK-LABEL: @xvavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } + // CHECK-LABEL: @xvavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } + // CHECK-LABEL: @xvavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } + // CHECK-LABEL: @xvavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } + // CHECK-LABEL: @xvavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } + // CHECK-LABEL: @xvavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } + // CHECK-LABEL: @xvavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } + // CHECK-LABEL: @xvavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } + // CHECK-LABEL: @xvavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } + // CHECK-LABEL: @xvavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } + // CHECK-LABEL: @xvavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } + // CHECK-LABEL: @xvssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } + // CHECK-LABEL: @xvssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } + // CHECK-LABEL: @xvssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } + // CHECK-LABEL: @xvssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } + // CHECK-LABEL: @xvssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } + // CHECK-LABEL: @xvssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } + // CHECK-LABEL: @xvssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } + // CHECK-LABEL: @xvssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } + // CHECK-LABEL: @xvabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } + // CHECK-LABEL: @xvabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } + // CHECK-LABEL: @xvabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } + // CHECK-LABEL: @xvabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } + // CHECK-LABEL: @xvabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } + // CHECK-LABEL: @xvabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } + // CHECK-LABEL: @xvabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } + // CHECK-LABEL: @xvabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } + // CHECK-LABEL: @xvmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } + // CHECK-LABEL: @xvmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } + // CHECK-LABEL: @xvmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } + // CHECK-LABEL: @xvmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } + // CHECK-LABEL: @xvmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } + // CHECK-LABEL: @xvdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } + // CHECK-LABEL: @xvdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } + // CHECK-LABEL: @xvdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } + // CHECK-LABEL: @xvdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } + // CHECK-LABEL: @xvdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } + // CHECK-LABEL: @xvdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } + // CHECK-LABEL: @xvdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } + // CHECK-LABEL: @xvhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } + // CHECK-LABEL: @xvhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } + // CHECK-LABEL: @xvhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } + // CHECK-LABEL: @xvhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } + // CHECK-LABEL: @xvhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } + // CHECK-LABEL: @xvhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } + // CHECK-LABEL: @xvhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } + // CHECK-LABEL: @xvmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } + // CHECK-LABEL: @xvmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } + // CHECK-LABEL: @xvmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } + // CHECK-LABEL: @xvmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } + // CHECK-LABEL: @xvmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } + // CHECK-LABEL: @xvmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } + // CHECK-LABEL: @xvmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } + // CHECK-LABEL: @xvmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } + // CHECK-LABEL: @xvrepl128vei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } + // CHECK-LABEL: @xvpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } + // CHECK-LABEL: @xvpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } + // CHECK-LABEL: @xvpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } + // CHECK-LABEL: @xvpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } + // CHECK-LABEL: @xvpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } + // CHECK-LABEL: @xvpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } + // CHECK-LABEL: @xvpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } + // CHECK-LABEL: @xvpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } + // CHECK-LABEL: @xvilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } + // CHECK-LABEL: @xvilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } + // CHECK-LABEL: @xvilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } + // CHECK-LABEL: @xvilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } + // CHECK-LABEL: @xvilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } + // CHECK-LABEL: @xvilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } + // CHECK-LABEL: @xvilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } + // CHECK-LABEL: @xvilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } + // CHECK-LABEL: @xvpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } + // CHECK-LABEL: @xvpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } + // CHECK-LABEL: @xvpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } + // CHECK-LABEL: @xvpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } + // CHECK-LABEL: @xvpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } + // CHECK-LABEL: @xvpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } + // CHECK-LABEL: @xvpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } + // CHECK-LABEL: @xvpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } + // CHECK-LABEL: @xvshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } + // CHECK-LABEL: @xvand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } + // CHECK-LABEL: @xvandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } + // CHECK-LABEL: @xvor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } + // CHECK-LABEL: @xvori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } + // CHECK-LABEL: @xvnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } + // CHECK-LABEL: @xvnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } + // CHECK-LABEL: @xvxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } + // CHECK-LABEL: @xvxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } + // CHECK-LABEL: @xvbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } + // CHECK-LABEL: @xvbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } + // CHECK-LABEL: @xvshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } + // CHECK-LABEL: @xvshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } + // CHECK-LABEL: @xvshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } + // CHECK-LABEL: @xvreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } + // CHECK-LABEL: @xvreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } + // CHECK-LABEL: @xvreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } + // CHECK-LABEL: @xvreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } + // CHECK-LABEL: @xvpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } + // CHECK-LABEL: @xvpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } + // CHECK-LABEL: @xvpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } + // CHECK-LABEL: @xvpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } + // CHECK-LABEL: @xvclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } + // CHECK-LABEL: @xvclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } + // CHECK-LABEL: @xvclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } + // CHECK-LABEL: @xvclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } + // CHECK-LABEL: @xvclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } + // CHECK-LABEL: @xvclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } + // CHECK-LABEL: @xvclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } + // CHECK-LABEL: @xvclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } + // CHECK-LABEL: @xvfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } + // CHECK-LABEL: @xvfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } + // CHECK-LABEL: @xvfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } + // CHECK-LABEL: @xvfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } + // CHECK-LABEL: @xvfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } + // CHECK-LABEL: @xvfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } + // CHECK-LABEL: @xvfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } + // CHECK-LABEL: @xvfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } + // CHECK-LABEL: @xvfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } + // CHECK-LABEL: @xvfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } + // CHECK-LABEL: @xvfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } + // CHECK-LABEL: @xvfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } + // CHECK-LABEL: @xvfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } + // CHECK-LABEL: @xvfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } + // CHECK-LABEL: @xvfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } + // CHECK-LABEL: @xvfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } + // CHECK-LABEL: @xvfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } + // CHECK-LABEL: @xvfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } + // CHECK-LABEL: @xvfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } + // CHECK-LABEL: @xvfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } + // CHECK-LABEL: @xvfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } + // CHECK-LABEL: @xvfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } + // CHECK-LABEL: @xvfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } + // CHECK-LABEL: @xvfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } + // CHECK-LABEL: @xvfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } + // CHECK-LABEL: @xvfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } + // CHECK-LABEL: @xvfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } + // CHECK-LABEL: @xvfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } + // CHECK-LABEL: @xvflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } + // CHECK-LABEL: @xvflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } + // CHECK-LABEL: @xvfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } + // CHECK-LABEL: @xvfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } + // CHECK-LABEL: @xvfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } + // CHECK-LABEL: @xvfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } + // CHECK-LABEL: @xvftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } + // CHECK-LABEL: @xvftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } + // CHECK-LABEL: @xvftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } + // CHECK-LABEL: @xvftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } + // CHECK-LABEL: @xvftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } + // CHECK-LABEL: @xvftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } + // CHECK-LABEL: @xvftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } + // CHECK-LABEL: @xvftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } + // CHECK-LABEL: @xvffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } + // CHECK-LABEL: @xvffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } + // CHECK-LABEL: @xvffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } + // CHECK-LABEL: @xvffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } + // CHECK-LABEL: @xvreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } + // CHECK-LABEL: @xvreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } + // CHECK-LABEL: @xvreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } + // CHECK-LABEL: @xvreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } + // CHECK-LABEL: @xvpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } + // CHECK-LABEL: @xvandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } + // CHECK-LABEL: @xvneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } + // CHECK-LABEL: @xvneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } + // CHECK-LABEL: @xvneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } + // CHECK-LABEL: @xvneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } + // CHECK-LABEL: @xvmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } + // CHECK-LABEL: @xvmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } + // CHECK-LABEL: @xvmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } + // CHECK-LABEL: @xvmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } + // CHECK-LABEL: @xvmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } + // CHECK-LABEL: @xvmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } + // CHECK-LABEL: @xvmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } + // CHECK-LABEL: @xvmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } + // CHECK-LABEL: @xvsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } + // CHECK-LABEL: @xvsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } + // CHECK-LABEL: @xvsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } + // CHECK-LABEL: @xvsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } + // CHECK-LABEL: @xvsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } + // CHECK-LABEL: @xvsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } + // CHECK-LABEL: @xvsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } + // CHECK-LABEL: @xvsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } + // CHECK-LABEL: @xvsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } + // CHECK-LABEL: @xvssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } + // CHECK-LABEL: @xvssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } + // CHECK-LABEL: @xvssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } + // CHECK-LABEL: @xvssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } + // CHECK-LABEL: @xvsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } + // CHECK-LABEL: @xvsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } + // CHECK-LABEL: @xvfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } + // CHECK-LABEL: @xvfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } + // CHECK-LABEL: @xvfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } + // CHECK-LABEL: @xvfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } + // CHECK-LABEL: @xvbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } + // CHECK-LABEL: @xvbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } + // CHECK-LABEL: @xvextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } + // CHECK-LABEL: @xvmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } + // CHECK-LABEL: @xvmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } + // CHECK-LABEL: @xvmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } + // CHECK-LABEL: @xvmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } + // CHECK-LABEL: @xvsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } + // CHECK-LABEL: @xvsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } + // CHECK-LABEL: @xvsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } + // CHECK-LABEL: @xvsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } + // CHECK-LABEL: @xvfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } + // CHECK-LABEL: @xvftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } + // CHECK-LABEL: @xvftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } + // CHECK-LABEL: @xvftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } + // CHECK-LABEL: @xvftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } + // CHECK-LABEL: @xvftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } + // CHECK-LABEL: @xvftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } + // CHECK-LABEL: @xvffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } + // CHECK-LABEL: @xvftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } + // CHECK-LABEL: @xvftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } + // CHECK-LABEL: @xvftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } + // CHECK-LABEL: @xvffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } + // CHECK-LABEL: @xvffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } + // CHECK-LABEL: @xvftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } + // CHECK-LABEL: @xvftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } + // CHECK-LABEL: @xvftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } + // CHECK-LABEL: @xvftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } + // CHECK-LABEL: @xvftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } + // CHECK-LABEL: @xvftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } + // CHECK-LABEL: @xvftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } + // CHECK-LABEL: @xvftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } + // CHECK-LABEL: @xvfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } + // CHECK-LABEL: @xvfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } + // CHECK-LABEL: @xvfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } + // CHECK-LABEL: @xvfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } + // CHECK-LABEL: @xvfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } + // CHECK-LABEL: @xvfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } + // CHECK-LABEL: @xvfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } + // CHECK-LABEL: @xvfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } + // CHECK-LABEL: @xvld( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } + // CHECK-LABEL: @xvst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) + // CHECK-NEXT: ret void + // + void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } + // CHECK-LABEL: @xvstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } + // CHECK-LABEL: @xvstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } + // CHECK-LABEL: @xvstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } + // CHECK-LABEL: @xvstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } + // CHECK-LABEL: @xvinsve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } + // CHECK-LABEL: @xvinsve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } + // CHECK-LABEL: @xvpickve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } + // CHECK-LABEL: @xvpickve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } + // CHECK-LABEL: @xvssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } + // CHECK-LABEL: @xvssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } + // CHECK-LABEL: @xvssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } + // CHECK-LABEL: @xvorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } + // CHECK-LABEL: @xvldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvldi() { return __lasx_xvldi(1); } + // CHECK-LABEL: @xvldx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } + // CHECK-LABEL: @xvstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1) + // CHECK-NEXT: ret void + // + void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } + // CHECK-LABEL: @xvextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } + // CHECK-LABEL: @xvinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } + // CHECK-LABEL: @xvinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } + // CHECK-LABEL: @xvreplve0_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } + // CHECK-LABEL: @xvreplve0_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } + // CHECK-LABEL: @xvreplve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } + // CHECK-LABEL: @xvreplve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } + // CHECK-LABEL: @xvreplve0_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } + // CHECK-LABEL: @vext2xv_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } + // CHECK-LABEL: @vext2xv_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } + // CHECK-LABEL: @vext2xv_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } + // CHECK-LABEL: @vext2xv_w_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } + // CHECK-LABEL: @vext2xv_d_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } + // CHECK-LABEL: @vext2xv_d_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } + // CHECK-LABEL: @vext2xv_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } + // CHECK-LABEL: @vext2xv_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } + // CHECK-LABEL: @vext2xv_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } + // CHECK-LABEL: @vext2xv_wu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } + // CHECK-LABEL: @vext2xv_du_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } + // CHECK-LABEL: @vext2xv_du_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } + // CHECK-LABEL: @xvpermi_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } + // CHECK-LABEL: @xvpermi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } + // CHECK-LABEL: @xvperm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } + // CHECK-LABEL: @xvldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } + // CHECK-LABEL: @xvldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } + // CHECK-LABEL: @xvldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } + // CHECK-LABEL: @xvldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } + // CHECK-LABEL: @xvpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } + // CHECK-LABEL: @xvaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } + // CHECK-LABEL: @xvhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } + // CHECK-LABEL: @xvhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } + // CHECK-LABEL: @xvhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } + // CHECK-LABEL: @xvmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } + // CHECK-LABEL: @xvrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } + // CHECK-LABEL: @xvrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } + // CHECK-LABEL: @xvrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } + // CHECK-LABEL: @xvadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } + // CHECK-LABEL: @xvsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } + // CHECK-LABEL: @xvmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } + // CHECK-LABEL: @xvexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } + // CHECK-LABEL: @xvexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } + // CHECK-LABEL: @xvexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } + // CHECK-LABEL: @xvexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } + // CHECK-LABEL: @xvexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } + // CHECK-LABEL: @xvexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } + // CHECK-LABEL: @xvexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } + // CHECK-LABEL: @xvexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } + // CHECK-LABEL: @xvrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } + // CHECK-LABEL: @xvrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } + // CHECK-LABEL: @xvrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } + // CHECK-LABEL: @xvrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } + // CHECK-LABEL: @xvextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } + // CHECK-LABEL: @xvsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xbnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } + // CHECK-LABEL: @xbnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } + // CHECK-LABEL: @xbnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } + // CHECK-LABEL: @xbnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } + // CHECK-LABEL: @xbnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } + // CHECK-LABEL: @xbz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } + // CHECK-LABEL: @xbz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } + // CHECK-LABEL: @xbz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } + // CHECK-LABEL: @xbz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } + // CHECK-LABEL: @xbz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } + // CHECK-LABEL: @xvfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } + // CHECK-LABEL: @xvpickve_d_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } + // CHECK-LABEL: @xvpickve_w_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } + // CHECK-LABEL: @xvrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } + // CHECK-LABEL: @xvrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } + // CHECK-LABEL: @xvrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } + // CHECK-LABEL: @xvrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c +index 0185f2004d52..f52a23a5faea 100644 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin.c ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c +@@ -27,4426 +27,6382 @@ typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + + // CHECK-LABEL: @xvsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } + // CHECK-LABEL: @xvsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } + // CHECK-LABEL: @xvsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } + // CHECK-LABEL: @xvsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } + // CHECK-LABEL: @xvslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } + // CHECK-LABEL: @xvslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } + // CHECK-LABEL: @xvslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } + // CHECK-LABEL: @xvslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } + // CHECK-LABEL: @xvsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } + // CHECK-LABEL: @xvsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } + // CHECK-LABEL: @xvsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } + // CHECK-LABEL: @xvsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } + // CHECK-LABEL: @xvsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } + // CHECK-LABEL: @xvsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } + // CHECK-LABEL: @xvsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } + // CHECK-LABEL: @xvsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } + // CHECK-LABEL: @xvsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } + // CHECK-LABEL: @xvsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } + // CHECK-LABEL: @xvsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } + // CHECK-LABEL: @xvsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } + // CHECK-LABEL: @xvsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } + // CHECK-LABEL: @xvsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } + // CHECK-LABEL: @xvsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } + // CHECK-LABEL: @xvsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } + // CHECK-LABEL: @xvsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } + // CHECK-LABEL: @xvsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } + // CHECK-LABEL: @xvsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } + // CHECK-LABEL: @xvsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } + // CHECK-LABEL: @xvsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } + // CHECK-LABEL: @xvsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } + // CHECK-LABEL: @xvsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } + // CHECK-LABEL: @xvsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } + // CHECK-LABEL: @xvsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } + // CHECK-LABEL: @xvsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } + // CHECK-LABEL: @xvsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } + // CHECK-LABEL: @xvsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } + // CHECK-LABEL: @xvsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } + // CHECK-LABEL: @xvsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } + // CHECK-LABEL: @xvsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } + // CHECK-LABEL: @xvsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } + // CHECK-LABEL: @xvbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } + // CHECK-LABEL: @xvbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } + // CHECK-LABEL: @xvbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } + // CHECK-LABEL: @xvbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } + // CHECK-LABEL: @xvbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } + // CHECK-LABEL: @xvbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } + // CHECK-LABEL: @xvbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } + // CHECK-LABEL: @xvbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } + // CHECK-LABEL: @xvbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } + // CHECK-LABEL: @xvbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } + // CHECK-LABEL: @xvbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } + // CHECK-LABEL: @xvbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } + // CHECK-LABEL: @xvbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } + // CHECK-LABEL: @xvbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } + // CHECK-LABEL: @xvbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } + // CHECK-LABEL: @xvbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } + // CHECK-LABEL: @xvbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } + // CHECK-LABEL: @xvbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } + // CHECK-LABEL: @xvbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } + // CHECK-LABEL: @xvbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } + // CHECK-LABEL: @xvbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } + // CHECK-LABEL: @xvbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } + // CHECK-LABEL: @xvbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } + // CHECK-LABEL: @xvbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } + // CHECK-LABEL: @xvadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } + // CHECK-LABEL: @xvadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } + // CHECK-LABEL: @xvadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } + // CHECK-LABEL: @xvadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } + // CHECK-LABEL: @xvaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } + // CHECK-LABEL: @xvaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } + // CHECK-LABEL: @xvaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } + // CHECK-LABEL: @xvaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } + // CHECK-LABEL: @xvsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } + // CHECK-LABEL: @xvsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } + // CHECK-LABEL: @xvsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } + // CHECK-LABEL: @xvsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } + // CHECK-LABEL: @xvsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } + // CHECK-LABEL: @xvsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } + // CHECK-LABEL: @xvsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } + // CHECK-LABEL: @xvsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } + // CHECK-LABEL: @xvmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } + // CHECK-LABEL: @xvmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } + // CHECK-LABEL: @xvmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } + // CHECK-LABEL: @xvmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } + // CHECK-LABEL: @xvmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } + // CHECK-LABEL: @xvmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } + // CHECK-LABEL: @xvmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } + // CHECK-LABEL: @xvmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } + // CHECK-LABEL: @xvmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } + // CHECK-LABEL: @xvmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } + // CHECK-LABEL: @xvmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } + // CHECK-LABEL: @xvmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } + // CHECK-LABEL: @xvmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } + // CHECK-LABEL: @xvmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } + // CHECK-LABEL: @xvmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } + // CHECK-LABEL: @xvmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } + // CHECK-LABEL: @xvmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } + // CHECK-LABEL: @xvmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } + // CHECK-LABEL: @xvmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } + // CHECK-LABEL: @xvmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } + // CHECK-LABEL: @xvmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } + // CHECK-LABEL: @xvmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } + // CHECK-LABEL: @xvmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } + // CHECK-LABEL: @xvmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } + // CHECK-LABEL: @xvmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } + // CHECK-LABEL: @xvmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } + // CHECK-LABEL: @xvmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } + // CHECK-LABEL: @xvmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } + // CHECK-LABEL: @xvmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } + // CHECK-LABEL: @xvmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } + // CHECK-LABEL: @xvmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } + // CHECK-LABEL: @xvmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } + // CHECK-LABEL: @xvseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } + // CHECK-LABEL: @xvseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } + // CHECK-LABEL: @xvseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } + // CHECK-LABEL: @xvseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } + // CHECK-LABEL: @xvseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } + // CHECK-LABEL: @xvseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } + // CHECK-LABEL: @xvseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } + // CHECK-LABEL: @xvseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } + // CHECK-LABEL: @xvslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } + // CHECK-LABEL: @xvslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } + // CHECK-LABEL: @xvslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } + // CHECK-LABEL: @xvslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } + // CHECK-LABEL: @xvslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } + // CHECK-LABEL: @xvslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } + // CHECK-LABEL: @xvslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } + // CHECK-LABEL: @xvslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } + // CHECK-LABEL: @xvslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } + // CHECK-LABEL: @xvslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } + // CHECK-LABEL: @xvslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } + // CHECK-LABEL: @xvslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } + // CHECK-LABEL: @xvslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } + // CHECK-LABEL: @xvslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } + // CHECK-LABEL: @xvslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } + // CHECK-LABEL: @xvslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } + // CHECK-LABEL: @xvsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } + // CHECK-LABEL: @xvsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } + // CHECK-LABEL: @xvsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } + // CHECK-LABEL: @xvsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } + // CHECK-LABEL: @xvslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } + // CHECK-LABEL: @xvslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } + // CHECK-LABEL: @xvslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } + // CHECK-LABEL: @xvslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } + // CHECK-LABEL: @xvsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } + // CHECK-LABEL: @xvsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } + // CHECK-LABEL: @xvsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } + // CHECK-LABEL: @xvsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } + // CHECK-LABEL: @xvslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } + // CHECK-LABEL: @xvslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } + // CHECK-LABEL: @xvslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } + // CHECK-LABEL: @xvslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } + // CHECK-LABEL: @xvsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } + // CHECK-LABEL: @xvsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } + // CHECK-LABEL: @xvsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } + // CHECK-LABEL: @xvsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } + // CHECK-LABEL: @xvsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } + // CHECK-LABEL: @xvsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } + // CHECK-LABEL: @xvsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } + // CHECK-LABEL: @xvsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } + // CHECK-LABEL: @xvadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } + // CHECK-LABEL: @xvadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } + // CHECK-LABEL: @xvadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } + // CHECK-LABEL: @xvadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } + // CHECK-LABEL: @xvsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } + // CHECK-LABEL: @xvsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } + // CHECK-LABEL: @xvsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } + // CHECK-LABEL: @xvsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } + // CHECK-LABEL: @xvsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } + // CHECK-LABEL: @xvsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } + // CHECK-LABEL: @xvsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } + // CHECK-LABEL: @xvsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } + // CHECK-LABEL: @xvavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } + // CHECK-LABEL: @xvavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } + // CHECK-LABEL: @xvavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } + // CHECK-LABEL: @xvavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } + // CHECK-LABEL: @xvavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } + // CHECK-LABEL: @xvavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } + // CHECK-LABEL: @xvavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } + // CHECK-LABEL: @xvavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } + // CHECK-LABEL: @xvavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } + // CHECK-LABEL: @xvavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } + // CHECK-LABEL: @xvavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } + // CHECK-LABEL: @xvavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } + // CHECK-LABEL: @xvavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } + // CHECK-LABEL: @xvavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } + // CHECK-LABEL: @xvavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } + // CHECK-LABEL: @xvavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } + // CHECK-LABEL: @xvssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } + // CHECK-LABEL: @xvssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } + // CHECK-LABEL: @xvssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } + // CHECK-LABEL: @xvssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } + // CHECK-LABEL: @xvssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } + // CHECK-LABEL: @xvssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } + // CHECK-LABEL: @xvssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } + // CHECK-LABEL: @xvssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } + // CHECK-LABEL: @xvabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } + // CHECK-LABEL: @xvabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } + // CHECK-LABEL: @xvabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } + // CHECK-LABEL: @xvabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } + // CHECK-LABEL: @xvabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } + // CHECK-LABEL: @xvabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } + // CHECK-LABEL: @xvabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } + // CHECK-LABEL: @xvabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } + // CHECK-LABEL: @xvmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } + // CHECK-LABEL: @xvmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } + // CHECK-LABEL: @xvmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } + // CHECK-LABEL: @xvmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } + // CHECK-LABEL: @xvmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } + // CHECK-LABEL: @xvdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } + // CHECK-LABEL: @xvdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } + // CHECK-LABEL: @xvdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } + // CHECK-LABEL: @xvdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } + // CHECK-LABEL: @xvdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } + // CHECK-LABEL: @xvdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } + // CHECK-LABEL: @xvdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } + // CHECK-LABEL: @xvhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } + // CHECK-LABEL: @xvhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } + // CHECK-LABEL: @xvhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } + // CHECK-LABEL: @xvhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } + // CHECK-LABEL: @xvhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } + // CHECK-LABEL: @xvhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } + // CHECK-LABEL: @xvhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } + // CHECK-LABEL: @xvmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } + // CHECK-LABEL: @xvmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } + // CHECK-LABEL: @xvmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } + // CHECK-LABEL: @xvmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } + // CHECK-LABEL: @xvmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } + // CHECK-LABEL: @xvmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } + // CHECK-LABEL: @xvmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } + // CHECK-LABEL: @xvmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } + // CHECK-LABEL: @xvrepl128vei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } + // CHECK-LABEL: @xvpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } + // CHECK-LABEL: @xvpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } + // CHECK-LABEL: @xvpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } + // CHECK-LABEL: @xvpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } + // CHECK-LABEL: @xvpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } + // CHECK-LABEL: @xvpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } + // CHECK-LABEL: @xvpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } + // CHECK-LABEL: @xvpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } + // CHECK-LABEL: @xvilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } + // CHECK-LABEL: @xvilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } + // CHECK-LABEL: @xvilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } + // CHECK-LABEL: @xvilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } + // CHECK-LABEL: @xvilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } + // CHECK-LABEL: @xvilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } + // CHECK-LABEL: @xvilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } + // CHECK-LABEL: @xvilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } + // CHECK-LABEL: @xvpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } + // CHECK-LABEL: @xvpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } + // CHECK-LABEL: @xvpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } + // CHECK-LABEL: @xvpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } + // CHECK-LABEL: @xvpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } + // CHECK-LABEL: @xvpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } + // CHECK-LABEL: @xvpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } + // CHECK-LABEL: @xvpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } + // CHECK-LABEL: @xvshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } + // CHECK-LABEL: @xvand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } + // CHECK-LABEL: @xvandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } + // CHECK-LABEL: @xvor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } + // CHECK-LABEL: @xvori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } + // CHECK-LABEL: @xvnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } + // CHECK-LABEL: @xvnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } + // CHECK-LABEL: @xvxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } + // CHECK-LABEL: @xvxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } + // CHECK-LABEL: @xvbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } + // CHECK-LABEL: @xvbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } + // CHECK-LABEL: @xvshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } + // CHECK-LABEL: @xvshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } + // CHECK-LABEL: @xvshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } + // CHECK-LABEL: @xvreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } + // CHECK-LABEL: @xvreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } + // CHECK-LABEL: @xvreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } + // CHECK-LABEL: @xvreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } + // CHECK-LABEL: @xvpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } + // CHECK-LABEL: @xvpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } + // CHECK-LABEL: @xvpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } + // CHECK-LABEL: @xvpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } + // CHECK-LABEL: @xvclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } + // CHECK-LABEL: @xvclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } + // CHECK-LABEL: @xvclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } + // CHECK-LABEL: @xvclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } + // CHECK-LABEL: @xvclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } + // CHECK-LABEL: @xvclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } + // CHECK-LABEL: @xvclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } + // CHECK-LABEL: @xvclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } + // CHECK-LABEL: @xvfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } + // CHECK-LABEL: @xvfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } + // CHECK-LABEL: @xvfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } + // CHECK-LABEL: @xvfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } + // CHECK-LABEL: @xvfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } + // CHECK-LABEL: @xvfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } + // CHECK-LABEL: @xvfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } + // CHECK-LABEL: @xvfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } + // CHECK-LABEL: @xvfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } + // CHECK-LABEL: @xvfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } + // CHECK-LABEL: @xvfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } + // CHECK-LABEL: @xvfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } + // CHECK-LABEL: @xvfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } + // CHECK-LABEL: @xvfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } + // CHECK-LABEL: @xvfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } + // CHECK-LABEL: @xvfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } + // CHECK-LABEL: @xvfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } + // CHECK-LABEL: @xvfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } + // CHECK-LABEL: @xvfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } + // CHECK-LABEL: @xvfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } + // CHECK-LABEL: @xvfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } + // CHECK-LABEL: @xvfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } + // CHECK-LABEL: @xvfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } + // CHECK-LABEL: @xvfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } + // CHECK-LABEL: @xvfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } + // CHECK-LABEL: @xvfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } + // CHECK-LABEL: @xvfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } + // CHECK-LABEL: @xvfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } + // CHECK-LABEL: @xvflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } + // CHECK-LABEL: @xvflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } + // CHECK-LABEL: @xvfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } + // CHECK-LABEL: @xvfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } + // CHECK-LABEL: @xvfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } + // CHECK-LABEL: @xvfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } + // CHECK-LABEL: @xvftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } + // CHECK-LABEL: @xvftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } + // CHECK-LABEL: @xvftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } + // CHECK-LABEL: @xvftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } + // CHECK-LABEL: @xvftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } + // CHECK-LABEL: @xvftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } + // CHECK-LABEL: @xvftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } + // CHECK-LABEL: @xvftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } + // CHECK-LABEL: @xvffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } + // CHECK-LABEL: @xvffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } + // CHECK-LABEL: @xvffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } + // CHECK-LABEL: @xvffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } + // CHECK-LABEL: @xvreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } + // CHECK-LABEL: @xvreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } + // CHECK-LABEL: @xvreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } + // CHECK-LABEL: @xvreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } + // CHECK-LABEL: @xvpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } + // CHECK-LABEL: @xvandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } + // CHECK-LABEL: @xvneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } + // CHECK-LABEL: @xvneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } + // CHECK-LABEL: @xvneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } + // CHECK-LABEL: @xvneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } + // CHECK-LABEL: @xvmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } + // CHECK-LABEL: @xvmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } + // CHECK-LABEL: @xvmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } + // CHECK-LABEL: @xvmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } + // CHECK-LABEL: @xvmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } + // CHECK-LABEL: @xvmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } + // CHECK-LABEL: @xvmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } + // CHECK-LABEL: @xvmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } + // CHECK-LABEL: @xvsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } + // CHECK-LABEL: @xvsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } + // CHECK-LABEL: @xvsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } + // CHECK-LABEL: @xvsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } + // CHECK-LABEL: @xvsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } + // CHECK-LABEL: @xvsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } + // CHECK-LABEL: @xvsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } + // CHECK-LABEL: @xvsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } + // CHECK-LABEL: @xvsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } + // CHECK-LABEL: @xvssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } + // CHECK-LABEL: @xvssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } + // CHECK-LABEL: @xvssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } + // CHECK-LABEL: @xvssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } + // CHECK-LABEL: @xvsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } + // CHECK-LABEL: @xvsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } + // CHECK-LABEL: @xvfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } + // CHECK-LABEL: @xvfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } + // CHECK-LABEL: @xvfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } + // CHECK-LABEL: @xvfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } + // CHECK-LABEL: @xvbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } + // CHECK-LABEL: @xvbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } + // CHECK-LABEL: @xvextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } + // CHECK-LABEL: @xvmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } + // CHECK-LABEL: @xvmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } + // CHECK-LABEL: @xvmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } + // CHECK-LABEL: @xvmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } + // CHECK-LABEL: @xvsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } + // CHECK-LABEL: @xvsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } + // CHECK-LABEL: @xvsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } + // CHECK-LABEL: @xvsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } + // CHECK-LABEL: @xvfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } + // CHECK-LABEL: @xvftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } + // CHECK-LABEL: @xvftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } + // CHECK-LABEL: @xvftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } + // CHECK-LABEL: @xvftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } + // CHECK-LABEL: @xvftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } + // CHECK-LABEL: @xvftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } + // CHECK-LABEL: @xvffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } + // CHECK-LABEL: @xvftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } + // CHECK-LABEL: @xvftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } + // CHECK-LABEL: @xvftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } + // CHECK-LABEL: @xvffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } + // CHECK-LABEL: @xvffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } + // CHECK-LABEL: @xvftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } + // CHECK-LABEL: @xvftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } + // CHECK-LABEL: @xvftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } + // CHECK-LABEL: @xvftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } + // CHECK-LABEL: @xvftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } + // CHECK-LABEL: @xvftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } + // CHECK-LABEL: @xvftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } + // CHECK-LABEL: @xvftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } + // CHECK-LABEL: @xvfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } + // CHECK-LABEL: @xvfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } + // CHECK-LABEL: @xvfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } + // CHECK-LABEL: @xvfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } + // CHECK-LABEL: @xvfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } + // CHECK-LABEL: @xvfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } + // CHECK-LABEL: @xvfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } + // CHECK-LABEL: @xvfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } + // CHECK-LABEL: @xvld( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } + // CHECK-LABEL: @xvst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) + // CHECK-NEXT: ret void + // + void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } + // CHECK-LABEL: @xvstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } + // CHECK-LABEL: @xvstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } + // CHECK-LABEL: @xvstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } + // CHECK-LABEL: @xvstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } + // CHECK-LABEL: @xvinsve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } + // CHECK-LABEL: @xvinsve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } + // CHECK-LABEL: @xvpickve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } + // CHECK-LABEL: @xvpickve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } + // CHECK-LABEL: @xvssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } + // CHECK-LABEL: @xvssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } + // CHECK-LABEL: @xvssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } + // CHECK-LABEL: @xvorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } + // CHECK-LABEL: @xvldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvldi() { return __builtin_lasx_xvldi(1); } + // CHECK-LABEL: @xvldx( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } + // CHECK-LABEL: @xvstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2:%.*]], i64 1) + // CHECK-NEXT: ret void + // + void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } + // CHECK-LABEL: @xvextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } + // CHECK-LABEL: @xvinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } + // CHECK-LABEL: @xvinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } + // CHECK-LABEL: @xvreplve0_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } + // CHECK-LABEL: @xvreplve0_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } + // CHECK-LABEL: @xvreplve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } + // CHECK-LABEL: @xvreplve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } + // CHECK-LABEL: @xvreplve0_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } + // CHECK-LABEL: @vext2xv_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } + // CHECK-LABEL: @vext2xv_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } + // CHECK-LABEL: @vext2xv_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } + // CHECK-LABEL: @vext2xv_w_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } + // CHECK-LABEL: @vext2xv_d_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } + // CHECK-LABEL: @vext2xv_d_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } + // CHECK-LABEL: @vext2xv_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } + // CHECK-LABEL: @vext2xv_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } + // CHECK-LABEL: @vext2xv_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } + // CHECK-LABEL: @vext2xv_wu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } + // CHECK-LABEL: @vext2xv_du_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } + // CHECK-LABEL: @vext2xv_du_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } + // CHECK-LABEL: @xvpermi_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } + // CHECK-LABEL: @xvpermi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } + // CHECK-LABEL: @xvperm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } + // CHECK-LABEL: @xvldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } + // CHECK-LABEL: @xvldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } + // CHECK-LABEL: @xvldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } + // CHECK-LABEL: @xvldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } + // CHECK-LABEL: @xvpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } + // CHECK-LABEL: @xvaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } + // CHECK-LABEL: @xvhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } + // CHECK-LABEL: @xvhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } + // CHECK-LABEL: @xvhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } + // CHECK-LABEL: @xvmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } + // CHECK-LABEL: @xvrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } + // CHECK-LABEL: @xvrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } + // CHECK-LABEL: @xvrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } + // CHECK-LABEL: @xvadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } + // CHECK-LABEL: @xvsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } + // CHECK-LABEL: @xvmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } + // CHECK-LABEL: @xvexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } + // CHECK-LABEL: @xvexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } + // CHECK-LABEL: @xvexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } + // CHECK-LABEL: @xvexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } + // CHECK-LABEL: @xvexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } + // CHECK-LABEL: @xvexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } + // CHECK-LABEL: @xvexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } + // CHECK-LABEL: @xvexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } + // CHECK-LABEL: @xvrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } + // CHECK-LABEL: @xvrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } + // CHECK-LABEL: @xvrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } + // CHECK-LABEL: @xvrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } + // CHECK-LABEL: @xvextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } + // CHECK-LABEL: @xvsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xbnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } + // CHECK-LABEL: @xbnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } + // CHECK-LABEL: @xbnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } + // CHECK-LABEL: @xbnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } + // CHECK-LABEL: @xbnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } + // CHECK-LABEL: @xbz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } + // CHECK-LABEL: @xbz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } + // CHECK-LABEL: @xbz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } + // CHECK-LABEL: @xbz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } + // CHECK-LABEL: @xbz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } + // CHECK-LABEL: @xvfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } + // CHECK-LABEL: @xvpickve_d_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } + // CHECK-LABEL: @xvpickve_w_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } + // CHECK-LABEL: @xvrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } + // CHECK-LABEL: @xvrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } + // CHECK-LABEL: @xvrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } + // CHECK-LABEL: @xvrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +index 331e29fb7d17..7a84e0ae24f9 100644 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +@@ -5,4080 +5,5838 @@ + + // CHECK-LABEL: @vsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } + // CHECK-LABEL: @vsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } + // CHECK-LABEL: @vsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } + // CHECK-LABEL: @vsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } + // CHECK-LABEL: @vslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } + // CHECK-LABEL: @vslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } + // CHECK-LABEL: @vslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } + // CHECK-LABEL: @vslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } + // CHECK-LABEL: @vsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } + // CHECK-LABEL: @vsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } + // CHECK-LABEL: @vsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } + // CHECK-LABEL: @vsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } + // CHECK-LABEL: @vsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } + // CHECK-LABEL: @vsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } + // CHECK-LABEL: @vsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } + // CHECK-LABEL: @vsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } + // CHECK-LABEL: @vsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } + // CHECK-LABEL: @vsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } + // CHECK-LABEL: @vsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } + // CHECK-LABEL: @vsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } + // CHECK-LABEL: @vsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } + // CHECK-LABEL: @vsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } + // CHECK-LABEL: @vsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } + // CHECK-LABEL: @vsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } + // CHECK-LABEL: @vsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } + // CHECK-LABEL: @vsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } + // CHECK-LABEL: @vsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } + // CHECK-LABEL: @vsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } + // CHECK-LABEL: @vsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } + // CHECK-LABEL: @vsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } + // CHECK-LABEL: @vsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } + // CHECK-LABEL: @vsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } + // CHECK-LABEL: @vsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } + // CHECK-LABEL: @vsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } + // CHECK-LABEL: @vsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } + // CHECK-LABEL: @vsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } + // CHECK-LABEL: @vsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } + // CHECK-LABEL: @vsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } + // CHECK-LABEL: @vsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } + // CHECK-LABEL: @vsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } + // CHECK-LABEL: @vbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } + // CHECK-LABEL: @vbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } + // CHECK-LABEL: @vbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } + // CHECK-LABEL: @vbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } + // CHECK-LABEL: @vbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } + // CHECK-LABEL: @vbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } + // CHECK-LABEL: @vbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } + // CHECK-LABEL: @vbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } + // CHECK-LABEL: @vbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } + // CHECK-LABEL: @vbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } + // CHECK-LABEL: @vbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } + // CHECK-LABEL: @vbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } + // CHECK-LABEL: @vbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } + // CHECK-LABEL: @vbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } + // CHECK-LABEL: @vbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } + // CHECK-LABEL: @vbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } + // CHECK-LABEL: @vbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } + // CHECK-LABEL: @vbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } + // CHECK-LABEL: @vbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } + // CHECK-LABEL: @vbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } + // CHECK-LABEL: @vbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } + // CHECK-LABEL: @vbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } + // CHECK-LABEL: @vbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } + // CHECK-LABEL: @vbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } + // CHECK-LABEL: @vadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } + // CHECK-LABEL: @vadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } + // CHECK-LABEL: @vadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } + // CHECK-LABEL: @vadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } + // CHECK-LABEL: @vaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } + // CHECK-LABEL: @vaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } + // CHECK-LABEL: @vaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } + // CHECK-LABEL: @vaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } + // CHECK-LABEL: @vsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } + // CHECK-LABEL: @vsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } + // CHECK-LABEL: @vsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } + // CHECK-LABEL: @vsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } + // CHECK-LABEL: @vsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } + // CHECK-LABEL: @vsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } + // CHECK-LABEL: @vsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } + // CHECK-LABEL: @vsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } + // CHECK-LABEL: @vmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } + // CHECK-LABEL: @vmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } + // CHECK-LABEL: @vmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } + // CHECK-LABEL: @vmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } + // CHECK-LABEL: @vmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } + // CHECK-LABEL: @vmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } + // CHECK-LABEL: @vmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } + // CHECK-LABEL: @vmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } + // CHECK-LABEL: @vmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } + // CHECK-LABEL: @vmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } + // CHECK-LABEL: @vmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } + // CHECK-LABEL: @vmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } + // CHECK-LABEL: @vmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } + // CHECK-LABEL: @vmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } + // CHECK-LABEL: @vmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } + // CHECK-LABEL: @vmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } + // CHECK-LABEL: @vmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } + // CHECK-LABEL: @vmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } + // CHECK-LABEL: @vmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } + // CHECK-LABEL: @vmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } + // CHECK-LABEL: @vmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } + // CHECK-LABEL: @vmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } + // CHECK-LABEL: @vmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } + // CHECK-LABEL: @vmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } + // CHECK-LABEL: @vmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } + // CHECK-LABEL: @vmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } + // CHECK-LABEL: @vmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } + // CHECK-LABEL: @vmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } + // CHECK-LABEL: @vmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } + // CHECK-LABEL: @vmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } + // CHECK-LABEL: @vmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } + // CHECK-LABEL: @vmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } + // CHECK-LABEL: @vseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } + // CHECK-LABEL: @vseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } + // CHECK-LABEL: @vseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } + // CHECK-LABEL: @vseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } + // CHECK-LABEL: @vseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } + // CHECK-LABEL: @vseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } + // CHECK-LABEL: @vseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } + // CHECK-LABEL: @vseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } + // CHECK-LABEL: @vslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } + // CHECK-LABEL: @vslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } + // CHECK-LABEL: @vslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } + // CHECK-LABEL: @vslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } + // CHECK-LABEL: @vslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } + // CHECK-LABEL: @vslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } + // CHECK-LABEL: @vslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } + // CHECK-LABEL: @vslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } + // CHECK-LABEL: @vslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } + // CHECK-LABEL: @vslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } + // CHECK-LABEL: @vslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } + // CHECK-LABEL: @vslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } + // CHECK-LABEL: @vslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } + // CHECK-LABEL: @vslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } + // CHECK-LABEL: @vslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } + // CHECK-LABEL: @vslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } + // CHECK-LABEL: @vsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } + // CHECK-LABEL: @vsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } + // CHECK-LABEL: @vsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } + // CHECK-LABEL: @vsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } + // CHECK-LABEL: @vslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } + // CHECK-LABEL: @vslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } + // CHECK-LABEL: @vslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } + // CHECK-LABEL: @vslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } + // CHECK-LABEL: @vsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } + // CHECK-LABEL: @vsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } + // CHECK-LABEL: @vsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } + // CHECK-LABEL: @vsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } + // CHECK-LABEL: @vslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } + // CHECK-LABEL: @vslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } + // CHECK-LABEL: @vslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } + // CHECK-LABEL: @vslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } + // CHECK-LABEL: @vsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } + // CHECK-LABEL: @vsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } + // CHECK-LABEL: @vsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } + // CHECK-LABEL: @vsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } + // CHECK-LABEL: @vsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } + // CHECK-LABEL: @vsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } + // CHECK-LABEL: @vsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } + // CHECK-LABEL: @vsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } + // CHECK-LABEL: @vadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } + // CHECK-LABEL: @vadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } + // CHECK-LABEL: @vadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } + // CHECK-LABEL: @vadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } + // CHECK-LABEL: @vsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } + // CHECK-LABEL: @vsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } + // CHECK-LABEL: @vsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } + // CHECK-LABEL: @vsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } + // CHECK-LABEL: @vsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } + // CHECK-LABEL: @vsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } + // CHECK-LABEL: @vsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } + // CHECK-LABEL: @vsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } + // CHECK-LABEL: @vavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } + // CHECK-LABEL: @vavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } + // CHECK-LABEL: @vavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } + // CHECK-LABEL: @vavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } + // CHECK-LABEL: @vavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } + // CHECK-LABEL: @vavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } + // CHECK-LABEL: @vavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } + // CHECK-LABEL: @vavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } + // CHECK-LABEL: @vavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } + // CHECK-LABEL: @vavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } + // CHECK-LABEL: @vavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } + // CHECK-LABEL: @vavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } + // CHECK-LABEL: @vavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } + // CHECK-LABEL: @vavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } + // CHECK-LABEL: @vavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } + // CHECK-LABEL: @vavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } + // CHECK-LABEL: @vssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } + // CHECK-LABEL: @vssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } + // CHECK-LABEL: @vssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } + // CHECK-LABEL: @vssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } + // CHECK-LABEL: @vssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } + // CHECK-LABEL: @vssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } + // CHECK-LABEL: @vssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } + // CHECK-LABEL: @vssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } + // CHECK-LABEL: @vabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } + // CHECK-LABEL: @vabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } + // CHECK-LABEL: @vabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } + // CHECK-LABEL: @vabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } + // CHECK-LABEL: @vabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } + // CHECK-LABEL: @vabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } + // CHECK-LABEL: @vabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } + // CHECK-LABEL: @vabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } + // CHECK-LABEL: @vmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } + // CHECK-LABEL: @vmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } + // CHECK-LABEL: @vmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } + // CHECK-LABEL: @vmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } + // CHECK-LABEL: @vmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmadd_b(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmadd_h(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmadd_w(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmsub_b(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmsub_h(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmsub_w(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } + // CHECK-LABEL: @vdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } + // CHECK-LABEL: @vdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } + // CHECK-LABEL: @vdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } + // CHECK-LABEL: @vdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } + // CHECK-LABEL: @vdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } + // CHECK-LABEL: @vdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } + // CHECK-LABEL: @vdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } + // CHECK-LABEL: @vhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } + // CHECK-LABEL: @vhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } + // CHECK-LABEL: @vhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } + // CHECK-LABEL: @vhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } + // CHECK-LABEL: @vhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } + // CHECK-LABEL: @vhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } + // CHECK-LABEL: @vhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } + // CHECK-LABEL: @vhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } + // CHECK-LABEL: @vhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } + // CHECK-LABEL: @vhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } + // CHECK-LABEL: @vhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } + // CHECK-LABEL: @vhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } + // CHECK-LABEL: @vmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } + // CHECK-LABEL: @vmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } + // CHECK-LABEL: @vmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } + // CHECK-LABEL: @vmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } + // CHECK-LABEL: @vmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } + // CHECK-LABEL: @vmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } + // CHECK-LABEL: @vmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } + // CHECK-LABEL: @vmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } + // CHECK-LABEL: @vreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } + // CHECK-LABEL: @vreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } + // CHECK-LABEL: @vreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } + // CHECK-LABEL: @vreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } + // CHECK-LABEL: @vreplvei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } + // CHECK-LABEL: @vreplvei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } + // CHECK-LABEL: @vreplvei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } + // CHECK-LABEL: @vreplvei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } + // CHECK-LABEL: @vpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } + // CHECK-LABEL: @vpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } + // CHECK-LABEL: @vpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } + // CHECK-LABEL: @vpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } + // CHECK-LABEL: @vpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } + // CHECK-LABEL: @vpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } + // CHECK-LABEL: @vpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } + // CHECK-LABEL: @vpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } + // CHECK-LABEL: @vilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } + // CHECK-LABEL: @vilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } + // CHECK-LABEL: @vilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } + // CHECK-LABEL: @vilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } + // CHECK-LABEL: @vilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } + // CHECK-LABEL: @vilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } + // CHECK-LABEL: @vilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } + // CHECK-LABEL: @vilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } + // CHECK-LABEL: @vpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } + // CHECK-LABEL: @vpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } + // CHECK-LABEL: @vpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } + // CHECK-LABEL: @vpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } + // CHECK-LABEL: @vpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } + // CHECK-LABEL: @vpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } + // CHECK-LABEL: @vpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } + // CHECK-LABEL: @vpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } + // CHECK-LABEL: @vshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vshuf_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vshuf_w(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vshuf_d(_1, _2, _3); + } + // CHECK-LABEL: @vand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } + // CHECK-LABEL: @vandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } + // CHECK-LABEL: @vor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } + // CHECK-LABEL: @vori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } + // CHECK-LABEL: @vnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } + // CHECK-LABEL: @vnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } + // CHECK-LABEL: @vxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } + // CHECK-LABEL: @vxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } + // CHECK-LABEL: @vbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __lsx_vbitsel_v(_1, _2, _3); + } + // CHECK-LABEL: @vbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } + // CHECK-LABEL: @vshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } + // CHECK-LABEL: @vshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } + // CHECK-LABEL: @vshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } + // CHECK-LABEL: @vreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } + // CHECK-LABEL: @vreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } + // CHECK-LABEL: @vreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } + // CHECK-LABEL: @vreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } + // CHECK-LABEL: @vpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } + // CHECK-LABEL: @vpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } + // CHECK-LABEL: @vpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } + // CHECK-LABEL: @vpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } + // CHECK-LABEL: @vclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } + // CHECK-LABEL: @vclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } + // CHECK-LABEL: @vclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } + // CHECK-LABEL: @vclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } + // CHECK-LABEL: @vclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } + // CHECK-LABEL: @vclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } + // CHECK-LABEL: @vclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } + // CHECK-LABEL: @vclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } + // CHECK-LABEL: @vpickve2gr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } + // CHECK-LABEL: @vpickve2gr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } + // CHECK-LABEL: @vpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } + // CHECK-LABEL: @vpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } + // CHECK-LABEL: @vpickve2gr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } + // CHECK-LABEL: @vpickve2gr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } + // CHECK-LABEL: @vpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } + // CHECK-LABEL: @vpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } + // CHECK-LABEL: @vinsgr2vr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } + // CHECK-LABEL: @vinsgr2vr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } + // CHECK-LABEL: @vinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } + // CHECK-LABEL: @vinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } + // CHECK-LABEL: @vfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } + // CHECK-LABEL: @vfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } + // CHECK-LABEL: @vfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } + // CHECK-LABEL: @vfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } + // CHECK-LABEL: @vfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } + // CHECK-LABEL: @vfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } + // CHECK-LABEL: @vfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } + // CHECK-LABEL: @vfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } + // CHECK-LABEL: @vfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } + // CHECK-LABEL: @vfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } + // CHECK-LABEL: @vfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } + // CHECK-LABEL: @vfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } + // CHECK-LABEL: @vfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } + // CHECK-LABEL: @vfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } + // CHECK-LABEL: @vfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } + // CHECK-LABEL: @vfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } + // CHECK-LABEL: @vfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } + // CHECK-LABEL: @vfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } + // CHECK-LABEL: @vfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } + // CHECK-LABEL: @vfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } + // CHECK-LABEL: @vfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } + // CHECK-LABEL: @vfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } + // CHECK-LABEL: @vfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } + // CHECK-LABEL: @vfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } + // CHECK-LABEL: @vfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } + // CHECK-LABEL: @vfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } + // CHECK-LABEL: @vfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } + // CHECK-LABEL: @vfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } + // CHECK-LABEL: @vflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } + // CHECK-LABEL: @vflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } + // CHECK-LABEL: @vfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } + // CHECK-LABEL: @vfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } + // CHECK-LABEL: @vfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } + // CHECK-LABEL: @vfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } + // CHECK-LABEL: @vftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } + // CHECK-LABEL: @vftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } + // CHECK-LABEL: @vftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } + // CHECK-LABEL: @vftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } + // CHECK-LABEL: @vftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } + // CHECK-LABEL: @vftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } + // CHECK-LABEL: @vftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } + // CHECK-LABEL: @vftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } + // CHECK-LABEL: @vffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } + // CHECK-LABEL: @vffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } + // CHECK-LABEL: @vffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } + // CHECK-LABEL: @vffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } + // CHECK-LABEL: @vandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } + // CHECK-LABEL: @vneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } + // CHECK-LABEL: @vneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } + // CHECK-LABEL: @vneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } + // CHECK-LABEL: @vneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } + // CHECK-LABEL: @vmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } + // CHECK-LABEL: @vmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } + // CHECK-LABEL: @vmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } + // CHECK-LABEL: @vmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } + // CHECK-LABEL: @vmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } + // CHECK-LABEL: @vmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } + // CHECK-LABEL: @vmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } + // CHECK-LABEL: @vmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } + // CHECK-LABEL: @vsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } + // CHECK-LABEL: @vsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } + // CHECK-LABEL: @vsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } + // CHECK-LABEL: @vsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } + // CHECK-LABEL: @vsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } + // CHECK-LABEL: @vsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } + // CHECK-LABEL: @vsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } + // CHECK-LABEL: @vsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } + // CHECK-LABEL: @vsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } + // CHECK-LABEL: @vssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } + // CHECK-LABEL: @vssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } + // CHECK-LABEL: @vssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } + // CHECK-LABEL: @vssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } + // CHECK-LABEL: @vssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } + // CHECK-LABEL: @vssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } + // CHECK-LABEL: @vsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } + // CHECK-LABEL: @vsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } + // CHECK-LABEL: @vsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } + // CHECK-LABEL: @vssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } + // CHECK-LABEL: @vssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } + // CHECK-LABEL: @vssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } + // CHECK-LABEL: @vssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } + // CHECK-LABEL: @vssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } + // CHECK-LABEL: @vssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } + // CHECK-LABEL: @vsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } + // CHECK-LABEL: @vsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } + // CHECK-LABEL: @vsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } + // CHECK-LABEL: @vssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } + // CHECK-LABEL: @vssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } + // CHECK-LABEL: @vssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } + // CHECK-LABEL: @vsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } + // CHECK-LABEL: @vsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } + // CHECK-LABEL: @vsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } + // CHECK-LABEL: @vssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } + // CHECK-LABEL: @vssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } + // CHECK-LABEL: @vssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } + // CHECK-LABEL: @vfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } + // CHECK-LABEL: @vfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } + // CHECK-LABEL: @vfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vfrstp_b(_1, _2, _3); + } + // CHECK-LABEL: @vfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vfrstp_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } + // CHECK-LABEL: @vbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } + // CHECK-LABEL: @vbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } + // CHECK-LABEL: @vextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } + // CHECK-LABEL: @vextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } + // CHECK-LABEL: @vextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } + // CHECK-LABEL: @vextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } + // CHECK-LABEL: @vmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } + // CHECK-LABEL: @vmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } + // CHECK-LABEL: @vmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } + // CHECK-LABEL: @vmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } + // CHECK-LABEL: @vsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } + // CHECK-LABEL: @vsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } + // CHECK-LABEL: @vsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } + // CHECK-LABEL: @vsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } + // CHECK-LABEL: @vfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } + // CHECK-LABEL: @vftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } + // CHECK-LABEL: @vftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } + // CHECK-LABEL: @vftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } + // CHECK-LABEL: @vftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } + // CHECK-LABEL: @vftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } + // CHECK-LABEL: @vftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } + // CHECK-LABEL: @vffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } + // CHECK-LABEL: @vftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } + // CHECK-LABEL: @vftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } + // CHECK-LABEL: @vftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } + // CHECK-LABEL: @vftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } + // CHECK-LABEL: @vftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } + // CHECK-LABEL: @vftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } + // CHECK-LABEL: @vffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } + // CHECK-LABEL: @vffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } + // CHECK-LABEL: @vftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } + // CHECK-LABEL: @vftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } + // CHECK-LABEL: @vftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } + // CHECK-LABEL: @vftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } + // CHECK-LABEL: @vftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } + // CHECK-LABEL: @vftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } + // CHECK-LABEL: @vftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } + // CHECK-LABEL: @vftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } + // CHECK-LABEL: @vfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } + // CHECK-LABEL: @vfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } + // CHECK-LABEL: @vfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } + // CHECK-LABEL: @vfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } + // CHECK-LABEL: @vfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } + // CHECK-LABEL: @vfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } + // CHECK-LABEL: @vfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } + // CHECK-LABEL: @vfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } + // CHECK-LABEL: @vstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } + // CHECK-LABEL: @vstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } + // CHECK-LABEL: @vstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } + // CHECK-LABEL: @vstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } + // CHECK-LABEL: @vaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } + // CHECK-LABEL: @vaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } + // CHECK-LABEL: @vaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } + // CHECK-LABEL: @vaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } + // CHECK-LABEL: @vaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } + // CHECK-LABEL: @vaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } + // CHECK-LABEL: @vaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } + // CHECK-LABEL: @vaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } + // CHECK-LABEL: @vaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } + // CHECK-LABEL: @vaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } + // CHECK-LABEL: @vaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } + // CHECK-LABEL: @vaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } + // CHECK-LABEL: @vaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } + // CHECK-LABEL: @vsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } + // CHECK-LABEL: @vsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } + // CHECK-LABEL: @vsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } + // CHECK-LABEL: @vsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } + // CHECK-LABEL: @vsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } + // CHECK-LABEL: @vsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } + // CHECK-LABEL: @vsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } + // CHECK-LABEL: @vsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } + // CHECK-LABEL: @vsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } + // CHECK-LABEL: @vsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } + // CHECK-LABEL: @vsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } + // CHECK-LABEL: @vaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } + // CHECK-LABEL: @vaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } + // CHECK-LABEL: @vaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } + // CHECK-LABEL: @vaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } + // CHECK-LABEL: @vsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } + // CHECK-LABEL: @vsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } + // CHECK-LABEL: @vsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } + // CHECK-LABEL: @vsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } + // CHECK-LABEL: @vaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } + // CHECK-LABEL: @vmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } + // CHECK-LABEL: @vmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } + // CHECK-LABEL: @vmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } + // CHECK-LABEL: @vmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } + // CHECK-LABEL: @vmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } + // CHECK-LABEL: @vmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } + // CHECK-LABEL: @vmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } + // CHECK-LABEL: @vmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } + // CHECK-LABEL: @vmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } + // CHECK-LABEL: @vmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } + // CHECK-LABEL: @vmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } + // CHECK-LABEL: @vmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } + // CHECK-LABEL: @vmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } + // CHECK-LABEL: @vmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } + // CHECK-LABEL: @vmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } + // CHECK-LABEL: @vmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } + // CHECK-LABEL: @vhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } + // CHECK-LABEL: @vhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } + // CHECK-LABEL: @vhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } + // CHECK-LABEL: @vmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwev_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwev_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwev_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwod_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwod_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwod_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwev_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwod_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } + // CHECK-LABEL: @vrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } + // CHECK-LABEL: @vrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } + // CHECK-LABEL: @vrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } + // CHECK-LABEL: @vadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } + // CHECK-LABEL: @vsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } + // CHECK-LABEL: @vldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } + // CHECK-LABEL: @vldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } + // CHECK-LABEL: @vldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } + // CHECK-LABEL: @vldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } + // CHECK-LABEL: @vmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } + // CHECK-LABEL: @vmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } + // CHECK-LABEL: @vexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } + // CHECK-LABEL: @vexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } + // CHECK-LABEL: @vexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } + // CHECK-LABEL: @vexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } + // CHECK-LABEL: @vexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } + // CHECK-LABEL: @vexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } + // CHECK-LABEL: @vexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } + // CHECK-LABEL: @vexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } + // CHECK-LABEL: @vrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } + // CHECK-LABEL: @vrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } + // CHECK-LABEL: @vrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } + // CHECK-LABEL: @vrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } + // CHECK-LABEL: @vextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } + // CHECK-LABEL: @vsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrlrni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrlrni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrlrni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrlrni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } + // CHECK-LABEL: @vssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } + // CHECK-LABEL: @vssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } + // CHECK-LABEL: @vssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrarni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrarni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrarni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrarni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } + // CHECK-LABEL: @vld( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } + // CHECK-LABEL: @vst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) + // CHECK-NEXT: ret void + // + void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } + // CHECK-LABEL: @vssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } + // CHECK-LABEL: @vssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } + // CHECK-LABEL: @vssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } + // CHECK-LABEL: @vssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } + // CHECK-LABEL: @vssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } + // CHECK-LABEL: @vssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } + // CHECK-LABEL: @vorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } + // CHECK-LABEL: @vldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vldi() { return __lsx_vldi(1); } + // CHECK-LABEL: @vshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vshuf_b(_1, _2, _3); +@@ -4086,366 +5844,516 @@ v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + // CHECK-LABEL: @vldx( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } + // CHECK-LABEL: @vstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) + // CHECK-NEXT: ret void + // + void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } + // CHECK-LABEL: @vextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } + // CHECK-LABEL: @bnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } + // CHECK-LABEL: @bnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } + // CHECK-LABEL: @bnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } + // CHECK-LABEL: @bnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } + // CHECK-LABEL: @bnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } + // CHECK-LABEL: @bz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } + // CHECK-LABEL: @bz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } + // CHECK-LABEL: @bz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } + // CHECK-LABEL: @bz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } + // CHECK-LABEL: @bz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } + // CHECK-LABEL: @vfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } + // CHECK-LABEL: @vfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } + // CHECK-LABEL: @vfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } + // CHECK-LABEL: @vfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } + // CHECK-LABEL: @vfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } + // CHECK-LABEL: @vfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } + // CHECK-LABEL: @vfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } + // CHECK-LABEL: @vfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } + // CHECK-LABEL: @vfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } + // CHECK-LABEL: @vfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } + // CHECK-LABEL: @vrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vrepli_b() { return __lsx_vrepli_b(1); } + // CHECK-LABEL: @vrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vrepli_d() { return __lsx_vrepli_d(1); } + // CHECK-LABEL: @vrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vrepli_h() { return __lsx_vrepli_h(1); } + // CHECK-LABEL: @vrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vrepli_w() { return __lsx_vrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c +index ef5a390e1838..05a3d13a7fb9 100644 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin.c ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c +@@ -29,3319 +29,4547 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + + // CHECK-LABEL: @vsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } + // CHECK-LABEL: @vsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } + // CHECK-LABEL: @vsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } + // CHECK-LABEL: @vsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } + // CHECK-LABEL: @vslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } + // CHECK-LABEL: @vslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } + // CHECK-LABEL: @vslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } + // CHECK-LABEL: @vslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } + // CHECK-LABEL: @vsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } + // CHECK-LABEL: @vsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } + // CHECK-LABEL: @vsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } + // CHECK-LABEL: @vsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } + // CHECK-LABEL: @vsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } + // CHECK-LABEL: @vsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } + // CHECK-LABEL: @vsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } + // CHECK-LABEL: @vsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } + // CHECK-LABEL: @vsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrar_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrar_b(_1, _2); + } + // CHECK-LABEL: @vsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrar_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrar_h(_1, _2); + } + // CHECK-LABEL: @vsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrar_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrar_w(_1, _2); + } + // CHECK-LABEL: @vsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrar_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrar_d(_1, _2); + } + // CHECK-LABEL: @vsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } + // CHECK-LABEL: @vsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } + // CHECK-LABEL: @vsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } + // CHECK-LABEL: @vsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } + // CHECK-LABEL: @vsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } + // CHECK-LABEL: @vsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } + // CHECK-LABEL: @vsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } + // CHECK-LABEL: @vsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } + // CHECK-LABEL: @vsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } + // CHECK-LABEL: @vsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } + // CHECK-LABEL: @vsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } + // CHECK-LABEL: @vsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } + // CHECK-LABEL: @vsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlr_b(_1, _2); + } + // CHECK-LABEL: @vsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlr_h(_1, _2); + } + // CHECK-LABEL: @vsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlr_w(_1, _2); + } + // CHECK-LABEL: @vsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlr_d(_1, _2); + } + // CHECK-LABEL: @vsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } + // CHECK-LABEL: @vsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } + // CHECK-LABEL: @vsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } + // CHECK-LABEL: @vsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } + // CHECK-LABEL: @vbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitclr_b(_1, _2); + } + // CHECK-LABEL: @vbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitclr_h(_1, _2); + } + // CHECK-LABEL: @vbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitclr_w(_1, _2); + } + // CHECK-LABEL: @vbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitclr_d(_1, _2); + } + // CHECK-LABEL: @vbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } + // CHECK-LABEL: @vbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } + // CHECK-LABEL: @vbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } + // CHECK-LABEL: @vbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } + // CHECK-LABEL: @vbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitset_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitset_b(_1, _2); + } + // CHECK-LABEL: @vbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitset_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitset_h(_1, _2); + } + // CHECK-LABEL: @vbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitset_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitset_w(_1, _2); + } + // CHECK-LABEL: @vbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitset_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitset_d(_1, _2); + } + // CHECK-LABEL: @vbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } + // CHECK-LABEL: @vbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } + // CHECK-LABEL: @vbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } + // CHECK-LABEL: @vbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } + // CHECK-LABEL: @vbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitrev_b(_1, _2); + } + // CHECK-LABEL: @vbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitrev_h(_1, _2); + } + // CHECK-LABEL: @vbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitrev_w(_1, _2); + } + // CHECK-LABEL: @vbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitrev_d(_1, _2); + } + // CHECK-LABEL: @vbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } + // CHECK-LABEL: @vbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } + // CHECK-LABEL: @vbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } + // CHECK-LABEL: @vbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } + // CHECK-LABEL: @vadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } + // CHECK-LABEL: @vadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } + // CHECK-LABEL: @vadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } + // CHECK-LABEL: @vadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } + // CHECK-LABEL: @vaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } + // CHECK-LABEL: @vaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } + // CHECK-LABEL: @vaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } + // CHECK-LABEL: @vaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } + // CHECK-LABEL: @vsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } + // CHECK-LABEL: @vsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } + // CHECK-LABEL: @vsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } + // CHECK-LABEL: @vsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } + // CHECK-LABEL: @vsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } + // CHECK-LABEL: @vsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } + // CHECK-LABEL: @vsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } + // CHECK-LABEL: @vsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } + // CHECK-LABEL: @vmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } + // CHECK-LABEL: @vmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } + // CHECK-LABEL: @vmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } + // CHECK-LABEL: @vmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } + // CHECK-LABEL: @vmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } + // CHECK-LABEL: @vmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } + // CHECK-LABEL: @vmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } + // CHECK-LABEL: @vmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } + // CHECK-LABEL: @vmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmax_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmax_bu(_1, _2); + } + // CHECK-LABEL: @vmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmax_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmax_hu(_1, _2); + } + // CHECK-LABEL: @vmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmax_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmax_wu(_1, _2); + } + // CHECK-LABEL: @vmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmax_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmax_du(_1, _2); + } + // CHECK-LABEL: @vmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } + // CHECK-LABEL: @vmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } + // CHECK-LABEL: @vmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } + // CHECK-LABEL: @vmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } + // CHECK-LABEL: @vmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } + // CHECK-LABEL: @vmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } + // CHECK-LABEL: @vmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } + // CHECK-LABEL: @vmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } + // CHECK-LABEL: @vmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } + // CHECK-LABEL: @vmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } + // CHECK-LABEL: @vmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } + // CHECK-LABEL: @vmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } + // CHECK-LABEL: @vmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmin_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmin_bu(_1, _2); + } + // CHECK-LABEL: @vmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmin_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmin_hu(_1, _2); + } + // CHECK-LABEL: @vmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmin_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmin_wu(_1, _2); + } + // CHECK-LABEL: @vmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmin_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmin_du(_1, _2); + } + // CHECK-LABEL: @vmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } + // CHECK-LABEL: @vmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } + // CHECK-LABEL: @vmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } + // CHECK-LABEL: @vmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } + // CHECK-LABEL: @vseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } + // CHECK-LABEL: @vseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } + // CHECK-LABEL: @vseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } + // CHECK-LABEL: @vseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } + // CHECK-LABEL: @vseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } + // CHECK-LABEL: @vseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } + // CHECK-LABEL: @vseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } + // CHECK-LABEL: @vseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } + // CHECK-LABEL: @vslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } + // CHECK-LABEL: @vslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } + // CHECK-LABEL: @vslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } + // CHECK-LABEL: @vslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } + // CHECK-LABEL: @vslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } + // CHECK-LABEL: @vslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } + // CHECK-LABEL: @vslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } + // CHECK-LABEL: @vslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } + // CHECK-LABEL: @vslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vslt_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vslt_bu(_1, _2); + } + // CHECK-LABEL: @vslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vslt_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vslt_hu(_1, _2); + } + // CHECK-LABEL: @vslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vslt_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vslt_wu(_1, _2); + } + // CHECK-LABEL: @vslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vslt_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vslt_du(_1, _2); + } + // CHECK-LABEL: @vslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } + // CHECK-LABEL: @vslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } + // CHECK-LABEL: @vslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } + // CHECK-LABEL: @vslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } + // CHECK-LABEL: @vsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } + // CHECK-LABEL: @vsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } + // CHECK-LABEL: @vsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } + // CHECK-LABEL: @vsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } + // CHECK-LABEL: @vslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } + // CHECK-LABEL: @vslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } + // CHECK-LABEL: @vslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } + // CHECK-LABEL: @vslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } + // CHECK-LABEL: @vsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsle_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsle_bu(_1, _2); + } + // CHECK-LABEL: @vsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsle_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsle_hu(_1, _2); + } + // CHECK-LABEL: @vsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsle_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsle_wu(_1, _2); + } + // CHECK-LABEL: @vsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsle_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsle_du(_1, _2); + } + // CHECK-LABEL: @vslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } + // CHECK-LABEL: @vslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } + // CHECK-LABEL: @vslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } + // CHECK-LABEL: @vslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } + // CHECK-LABEL: @vsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } + // CHECK-LABEL: @vsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } + // CHECK-LABEL: @vsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } + // CHECK-LABEL: @vsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } + // CHECK-LABEL: @vsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } + // CHECK-LABEL: @vsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } + // CHECK-LABEL: @vsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } + // CHECK-LABEL: @vsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } + // CHECK-LABEL: @vadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vadda_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vadda_b(_1, _2); + } + // CHECK-LABEL: @vadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vadda_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vadda_h(_1, _2); + } + // CHECK-LABEL: @vadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vadda_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vadda_w(_1, _2); + } + // CHECK-LABEL: @vadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadda_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vadda_d(_1, _2); + } + // CHECK-LABEL: @vsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsadd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsadd_b(_1, _2); + } + // CHECK-LABEL: @vsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsadd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsadd_h(_1, _2); + } + // CHECK-LABEL: @vsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsadd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsadd_w(_1, _2); + } + // CHECK-LABEL: @vsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsadd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsadd_d(_1, _2); + } + // CHECK-LABEL: @vsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsadd_bu(_1, _2); + } + // CHECK-LABEL: @vsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsadd_hu(_1, _2); + } + // CHECK-LABEL: @vsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsadd_wu(_1, _2); + } + // CHECK-LABEL: @vsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vsadd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsadd_du(_1, _2); + } + // CHECK-LABEL: @vavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } + // CHECK-LABEL: @vavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } + // CHECK-LABEL: @vavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } + // CHECK-LABEL: @vavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } + // CHECK-LABEL: @vavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vavg_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavg_bu(_1, _2); + } + // CHECK-LABEL: @vavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vavg_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavg_hu(_1, _2); + } + // CHECK-LABEL: @vavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vavg_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavg_wu(_1, _2); + } + // CHECK-LABEL: @vavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vavg_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavg_du(_1, _2); + } + // CHECK-LABEL: @vavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vavgr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vavgr_b(_1, _2); + } + // CHECK-LABEL: @vavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vavgr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vavgr_h(_1, _2); + } + // CHECK-LABEL: @vavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vavgr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vavgr_w(_1, _2); + } + // CHECK-LABEL: @vavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vavgr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vavgr_d(_1, _2); + } + // CHECK-LABEL: @vavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavgr_bu(_1, _2); + } + // CHECK-LABEL: @vavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavgr_hu(_1, _2); + } + // CHECK-LABEL: @vavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavgr_wu(_1, _2); + } + // CHECK-LABEL: @vavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vavgr_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavgr_du(_1, _2); + } + // CHECK-LABEL: @vssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssub_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssub_b(_1, _2); + } + // CHECK-LABEL: @vssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssub_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssub_h(_1, _2); + } + // CHECK-LABEL: @vssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssub_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssub_w(_1, _2); + } + // CHECK-LABEL: @vssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssub_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssub_d(_1, _2); + } + // CHECK-LABEL: @vssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssub_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vssub_bu(_1, _2); + } + // CHECK-LABEL: @vssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssub_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssub_hu(_1, _2); + } + // CHECK-LABEL: @vssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssub_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssub_wu(_1, _2); + } + // CHECK-LABEL: @vssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssub_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssub_du(_1, _2); + } + // CHECK-LABEL: @vabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vabsd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vabsd_b(_1, _2); + } + // CHECK-LABEL: @vabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vabsd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vabsd_h(_1, _2); + } + // CHECK-LABEL: @vabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vabsd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vabsd_w(_1, _2); + } + // CHECK-LABEL: @vabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vabsd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vabsd_d(_1, _2); + } + // CHECK-LABEL: @vabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vabsd_bu(_1, _2); + } + // CHECK-LABEL: @vabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vabsd_hu(_1, _2); + } + // CHECK-LABEL: @vabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vabsd_wu(_1, _2); + } + // CHECK-LABEL: @vabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vabsd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vabsd_du(_1, _2); + } + // CHECK-LABEL: @vmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } + // CHECK-LABEL: @vmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } + // CHECK-LABEL: @vmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } + // CHECK-LABEL: @vmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } + // CHECK-LABEL: @vmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmadd_b(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmadd_h(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmadd_w(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmsub_b(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmsub_h(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmsub_w(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } + // CHECK-LABEL: @vdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } + // CHECK-LABEL: @vdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } + // CHECK-LABEL: @vdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } + // CHECK-LABEL: @vdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vdiv_bu(_1, _2); + } + // CHECK-LABEL: @vdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vdiv_hu(_1, _2); + } + // CHECK-LABEL: @vdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vdiv_wu(_1, _2); + } + // CHECK-LABEL: @vdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vdiv_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vdiv_du(_1, _2); + } + // CHECK-LABEL: @vhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhaddw_h_b(_1, _2); + } + // CHECK-LABEL: @vhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhaddw_w_h(_1, _2); + } + // CHECK-LABEL: @vhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhaddw_d_w(_1, _2); + } + // CHECK-LABEL: @vhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhaddw_hu_bu(_1, _2); + } + // CHECK-LABEL: @vhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhaddw_wu_hu(_1, _2); + } + // CHECK-LABEL: @vhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhaddw_du_wu(_1, _2); + } + // CHECK-LABEL: @vhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhsubw_h_b(_1, _2); + } + // CHECK-LABEL: @vhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhsubw_w_h(_1, _2); + } + // CHECK-LABEL: @vhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhsubw_d_w(_1, _2); + } + // CHECK-LABEL: @vhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhsubw_hu_bu(_1, _2); + } + // CHECK-LABEL: @vhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhsubw_wu_hu(_1, _2); + } + // CHECK-LABEL: @vhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhsubw_du_wu(_1, _2); + } + // CHECK-LABEL: @vmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } + // CHECK-LABEL: @vmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } + // CHECK-LABEL: @vmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } + // CHECK-LABEL: @vmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } + // CHECK-LABEL: @vmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmod_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmod_bu(_1, _2); + } + // CHECK-LABEL: @vmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmod_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmod_hu(_1, _2); + } + // CHECK-LABEL: @vmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmod_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmod_wu(_1, _2); + } + // CHECK-LABEL: @vmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmod_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmod_du(_1, _2); + } + // CHECK-LABEL: @vreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vreplve_b(v16i8 _1, int _2) { + return __builtin_lsx_vreplve_b(_1, _2); + } + // CHECK-LABEL: @vreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vreplve_h(v8i16 _1, int _2) { + return __builtin_lsx_vreplve_h(_1, _2); + } + // CHECK-LABEL: @vreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vreplve_w(v4i32 _1, int _2) { + return __builtin_lsx_vreplve_w(_1, _2); + } + // CHECK-LABEL: @vreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vreplve_d(v2i64 _1, int _2) { + return __builtin_lsx_vreplve_d(_1, _2); + } + // CHECK-LABEL: @vreplvei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } + // CHECK-LABEL: @vreplvei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } + // CHECK-LABEL: @vreplvei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } + // CHECK-LABEL: @vreplvei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } + // CHECK-LABEL: @vpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpickev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickev_b(_1, _2); + } + // CHECK-LABEL: @vpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpickev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickev_h(_1, _2); + } + // CHECK-LABEL: @vpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpickev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickev_w(_1, _2); + } + // CHECK-LABEL: @vpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpickev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickev_d(_1, _2); + } + // CHECK-LABEL: @vpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpickod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickod_b(_1, _2); + } + // CHECK-LABEL: @vpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpickod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickod_h(_1, _2); + } + // CHECK-LABEL: @vpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpickod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickod_w(_1, _2); + } + // CHECK-LABEL: @vpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpickod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickod_d(_1, _2); + } + // CHECK-LABEL: @vilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vilvh_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvh_b(_1, _2); + } + // CHECK-LABEL: @vilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vilvh_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvh_h(_1, _2); + } + // CHECK-LABEL: @vilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vilvh_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvh_w(_1, _2); + } + // CHECK-LABEL: @vilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vilvh_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvh_d(_1, _2); + } + // CHECK-LABEL: @vilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vilvl_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvl_b(_1, _2); + } + // CHECK-LABEL: @vilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vilvl_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvl_h(_1, _2); + } + // CHECK-LABEL: @vilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vilvl_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvl_w(_1, _2); + } + // CHECK-LABEL: @vilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vilvl_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvl_d(_1, _2); + } + // CHECK-LABEL: @vpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpackev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackev_b(_1, _2); + } + // CHECK-LABEL: @vpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpackev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackev_h(_1, _2); + } + // CHECK-LABEL: @vpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpackev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackev_w(_1, _2); + } + // CHECK-LABEL: @vpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpackev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackev_d(_1, _2); + } + // CHECK-LABEL: @vpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpackod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackod_b(_1, _2); + } + // CHECK-LABEL: @vpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpackod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackod_h(_1, _2); + } + // CHECK-LABEL: @vpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpackod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackod_w(_1, _2); + } + // CHECK-LABEL: @vpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpackod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackod_d(_1, _2); + } + // CHECK-LABEL: @vshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vshuf_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vshuf_w(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vshuf_d(_1, _2, _3); + } + // CHECK-LABEL: @vand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } + // CHECK-LABEL: @vandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } + // CHECK-LABEL: @vor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } + // CHECK-LABEL: @vori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } + // CHECK-LABEL: @vnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } + // CHECK-LABEL: @vnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } + // CHECK-LABEL: @vxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } + // CHECK-LABEL: @vxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } + // CHECK-LABEL: @vbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vbitsel_v(_1, _2, _3); + } + // CHECK-LABEL: @vbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitseli_b(_1, _2, 1); + } + // CHECK-LABEL: @vshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } + // CHECK-LABEL: @vshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } + // CHECK-LABEL: @vshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } + // CHECK-LABEL: @vreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } + // CHECK-LABEL: @vreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } + // CHECK-LABEL: @vreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } + // CHECK-LABEL: @vreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } + // CHECK-LABEL: @vpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } + // CHECK-LABEL: @vpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } + // CHECK-LABEL: @vpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } + // CHECK-LABEL: @vpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } + // CHECK-LABEL: @vclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } + // CHECK-LABEL: @vclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } + // CHECK-LABEL: @vclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } + // CHECK-LABEL: @vclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } + // CHECK-LABEL: @vclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } + // CHECK-LABEL: @vclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } + // CHECK-LABEL: @vclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } + // CHECK-LABEL: @vclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } + // CHECK-LABEL: @vpickve2gr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } + // CHECK-LABEL: @vpickve2gr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } + // CHECK-LABEL: @vpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } + // CHECK-LABEL: @vpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } + // CHECK-LABEL: @vpickve2gr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_bu(v16i8 _1) { + return __builtin_lsx_vpickve2gr_bu(_1, 1); + } + // CHECK-LABEL: @vpickve2gr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_hu(v8i16 _1) { + return __builtin_lsx_vpickve2gr_hu(_1, 1); + } + // CHECK-LABEL: @vpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_wu(v4i32 _1) { + return __builtin_lsx_vpickve2gr_wu(_1, 1); + } + // CHECK-LABEL: @vpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + unsigned long int vpickve2gr_du(v2i64 _1) { + return __builtin_lsx_vpickve2gr_du(_1, 1); + } + // CHECK-LABEL: @vinsgr2vr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vinsgr2vr_b(v16i8 _1) { + return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); + } + // CHECK-LABEL: @vinsgr2vr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vinsgr2vr_h(v8i16 _1) { + return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); + } + // CHECK-LABEL: @vinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vinsgr2vr_w(v4i32 _1) { + return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); + } + // CHECK-LABEL: @vinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vinsgr2vr_d(v2i64 _1) { + return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); + } + // CHECK-LABEL: @vfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfadd_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfadd_s(_1, _2); + } + // CHECK-LABEL: @vfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfadd_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfadd_d(_1, _2); + } + // CHECK-LABEL: @vfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfsub_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfsub_s(_1, _2); + } + // CHECK-LABEL: @vfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfsub_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfsub_d(_1, _2); + } + // CHECK-LABEL: @vfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmul_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmul_s(_1, _2); + } + // CHECK-LABEL: @vfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmul_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmul_d(_1, _2); + } + // CHECK-LABEL: @vfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfdiv_s(_1, _2); + } + // CHECK-LABEL: @vfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfdiv_d(_1, _2); + } + // CHECK-LABEL: @vfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcvt_h_s(_1, _2); + } + // CHECK-LABEL: @vfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcvt_s_d(_1, _2); + } + // CHECK-LABEL: @vfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmin_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmin_s(_1, _2); + } + // CHECK-LABEL: @vfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmin_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmin_d(_1, _2); + } + // CHECK-LABEL: @vfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmina_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmina_s(_1, _2); + } + // CHECK-LABEL: @vfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmina_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmina_d(_1, _2); + } + // CHECK-LABEL: @vfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmax_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmax_s(_1, _2); + } + // CHECK-LABEL: @vfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmax_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmax_d(_1, _2); + } + // CHECK-LABEL: @vfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmaxa_s(_1, _2); + } + // CHECK-LABEL: @vfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmaxa_d(_1, _2); + } + // CHECK-LABEL: @vfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } + // CHECK-LABEL: @vfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } + // CHECK-LABEL: @vfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } + // CHECK-LABEL: @vfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } + // CHECK-LABEL: @vfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } + // CHECK-LABEL: @vfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } + // CHECK-LABEL: @vfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } + // CHECK-LABEL: @vfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } + // CHECK-LABEL: @vfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } + // CHECK-LABEL: @vfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } + // CHECK-LABEL: @vflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } + // CHECK-LABEL: @vflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } + // CHECK-LABEL: @vfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } + // CHECK-LABEL: @vfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } + // CHECK-LABEL: @vfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } + // CHECK-LABEL: @vfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } + // CHECK-LABEL: @vftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } + // CHECK-LABEL: @vftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } + // CHECK-LABEL: @vftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } + // CHECK-LABEL: @vftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } + // CHECK-LABEL: @vftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } + // CHECK-LABEL: @vftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } + // CHECK-LABEL: @vftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } + // CHECK-LABEL: @vftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } + // CHECK-LABEL: @vffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } + // CHECK-LABEL: @vffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } + // CHECK-LABEL: @vffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } + // CHECK-LABEL: @vffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } + // CHECK-LABEL: @vandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vandn_v(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vandn_v(_1, _2); + } + // CHECK-LABEL: @vneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } + // CHECK-LABEL: @vneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } + // CHECK-LABEL: @vneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } + // CHECK-LABEL: @vneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } + // CHECK-LABEL: @vmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } + // CHECK-LABEL: @vmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } + // CHECK-LABEL: @vmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } + // CHECK-LABEL: @vmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } + // CHECK-LABEL: @vmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmuh_bu(_1, _2); + } + // CHECK-LABEL: @vmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmuh_hu(_1, _2); + } + // CHECK-LABEL: @vmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmuh_wu(_1, _2); + } + // CHECK-LABEL: @vmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmuh_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmuh_du(_1, _2); + } + // CHECK-LABEL: @vsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } + // CHECK-LABEL: @vsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } + // CHECK-LABEL: @vsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } + // CHECK-LABEL: @vsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vsllwil_hu_bu(v16u8 _1) { + return __builtin_lsx_vsllwil_hu_bu(_1, 1); + } + // CHECK-LABEL: @vsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vsllwil_wu_hu(v8u16 _1) { + return __builtin_lsx_vsllwil_wu_hu(_1, 1); + } + // CHECK-LABEL: @vsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vsllwil_du_wu(v4u32 _1) { + return __builtin_lsx_vsllwil_du_wu(_1, 1); + } + // CHECK-LABEL: @vsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsran_b_h(_1, _2); + } + // CHECK-LABEL: @vsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsran_h_w(_1, _2); + } + // CHECK-LABEL: @vsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsran_w_d(_1, _2); + } + // CHECK-LABEL: @vssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssran_b_h(_1, _2); + } + // CHECK-LABEL: @vssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssran_h_w(_1, _2); + } + // CHECK-LABEL: @vssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssran_w_d(_1, _2); + } + // CHECK-LABEL: @vssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssran_bu_h(_1, _2); + } + // CHECK-LABEL: @vssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssran_hu_w(_1, _2); + } + // CHECK-LABEL: @vssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssran_wu_d(_1, _2); + } + // CHECK-LABEL: @vsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarn_b_h(_1, _2); + } + // CHECK-LABEL: @vsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarn_h_w(_1, _2); + } + // CHECK-LABEL: @vsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarn_b_h(_1, _2); + } + // CHECK-LABEL: @vssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarn_h_w(_1, _2); + } + // CHECK-LABEL: @vssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrarn_bu_h(_1, _2); + } + // CHECK-LABEL: @vssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrarn_hu_w(_1, _2); + } + // CHECK-LABEL: @vssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrarn_wu_d(_1, _2); + } + // CHECK-LABEL: @vsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrln_b_h(_1, _2); + } + // CHECK-LABEL: @vsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrln_h_w(_1, _2); + } + // CHECK-LABEL: @vsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrln_w_d(_1, _2); + } + // CHECK-LABEL: @vssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrln_bu_h(_1, _2); + } + // CHECK-LABEL: @vssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrln_hu_w(_1, _2); + } + // CHECK-LABEL: @vssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrln_wu_d(_1, _2); + } + // CHECK-LABEL: @vsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrn_b_h(_1, _2); + } + // CHECK-LABEL: @vsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrn_h_w(_1, _2); + } + // CHECK-LABEL: @vsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrlrn_bu_h(_1, _2); + } + // CHECK-LABEL: @vssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrlrn_hu_w(_1, _2); + } + // CHECK-LABEL: @vssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrlrn_wu_d(_1, _2); + } + // CHECK-LABEL: @vfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vfrstpi_b(_1, _2, 1); + } + // CHECK-LABEL: @vfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vfrstpi_h(_1, _2, 1); + } + // CHECK-LABEL: @vfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vfrstp_b(_1, _2, _3); + } + // CHECK-LABEL: @vfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vfrstp_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vshuf4i_d(_1, _2, 1); + } + // CHECK-LABEL: @vbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } + // CHECK-LABEL: @vbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } + // CHECK-LABEL: @vextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vextrins_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vextrins_b(_1, _2, 1); + } + // CHECK-LABEL: @vextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vextrins_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vextrins_h(_1, _2, 1); + } + // CHECK-LABEL: @vextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vextrins_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vextrins_w(_1, _2, 1); + } + // CHECK-LABEL: @vextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vextrins_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vextrins_d(_1, _2, 1); + } + // CHECK-LABEL: @vmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } + // CHECK-LABEL: @vmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } + // CHECK-LABEL: @vmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } + // CHECK-LABEL: @vmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } + // CHECK-LABEL: @vsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsigncov_b(_1, _2); + } + // CHECK-LABEL: @vsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsigncov_h(_1, _2); + } + // CHECK-LABEL: @vsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsigncov_w(_1, _2); + } + // CHECK-LABEL: @vsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsigncov_d(_1, _2); + } + // CHECK-LABEL: @vfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } + // CHECK-LABEL: @vftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } + // CHECK-LABEL: @vftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } + // CHECK-LABEL: @vftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } + // CHECK-LABEL: @vftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } + // CHECK-LABEL: @vftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } + // CHECK-LABEL: @vftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftint_w_d(_1, _2); + } + // CHECK-LABEL: @vffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vffint_s_l(_1, _2); + } + // CHECK-LABEL: @vftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrz_w_d(_1, _2); + } + // CHECK-LABEL: @vftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrp_w_d(_1, _2); + } + // CHECK-LABEL: @vftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrm_w_d(_1, _2); + } + // CHECK-LABEL: @vftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrne_w_d(_1, _2); + } + // CHECK-LABEL: @vftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } + // CHECK-LABEL: @vftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } + // CHECK-LABEL: @vffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } + // CHECK-LABEL: @vffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } + // CHECK-LABEL: @vftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } + // CHECK-LABEL: @vftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } + // CHECK-LABEL: @vftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } + // CHECK-LABEL: @vftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } + // CHECK-LABEL: @vftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } + // CHECK-LABEL: @vftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } + // CHECK-LABEL: @vftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrnel_l_s(v4f32 _1) { + return __builtin_lsx_vftintrnel_l_s(_1); + } + // CHECK-LABEL: @vftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrneh_l_s(v4f32 _1) { + return __builtin_lsx_vftintrneh_l_s(_1); + } + // CHECK-LABEL: @vfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } + // CHECK-LABEL: @vfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } + // CHECK-LABEL: @vfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } + // CHECK-LABEL: @vfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } + // CHECK-LABEL: @vfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } + // CHECK-LABEL: @vfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } + // CHECK-LABEL: @vfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } + // CHECK-LABEL: @vfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } + // CHECK-LABEL: @vstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_b(v16i8 _1, void *_2) { +@@ -3349,7 +4577,8 @@ void vstelm_b(v16i8 _1, void *_2) { + } + // CHECK-LABEL: @vstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_h(v8i16 _1, void *_2) { +@@ -3357,7 +4586,8 @@ void vstelm_h(v8i16 _1, void *_2) { + } + // CHECK-LABEL: @vstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_w(v4i32 _1, void *_2) { +@@ -3365,7 +4595,8 @@ void vstelm_w(v4i32 _1, void *_2) { + } + // CHECK-LABEL: @vstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_d(v2i64 _1, void *_2) { +@@ -3373,1286 +4604,1785 @@ void vstelm_d(v2i64 _1, void *_2) { + } + // CHECK-LABEL: @vaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_w(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_h(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_b(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_w(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_h(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_b(_1, _2); + } + // CHECK-LABEL: @vaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwev_d_wu(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwev_w_hu(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwev_h_bu(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwod_d_wu(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwod_w_hu(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwod_h_bu(_1, _2); + } + // CHECK-LABEL: @vaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwev_d_w(_1, _2); + } + // CHECK-LABEL: @vsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwev_w_h(_1, _2); + } + // CHECK-LABEL: @vsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwev_h_b(_1, _2); + } + // CHECK-LABEL: @vsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwod_d_w(_1, _2); + } + // CHECK-LABEL: @vsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwod_w_h(_1, _2); + } + // CHECK-LABEL: @vsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwod_h_b(_1, _2); + } + // CHECK-LABEL: @vsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwev_d_wu(_1, _2); + } + // CHECK-LABEL: @vsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwev_w_hu(_1, _2); + } + // CHECK-LABEL: @vsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwev_h_bu(_1, _2); + } + // CHECK-LABEL: @vsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwod_d_wu(_1, _2); + } + // CHECK-LABEL: @vsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwod_w_hu(_1, _2); + } + // CHECK-LABEL: @vsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwod_h_bu(_1, _2); + } + // CHECK-LABEL: @vaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_d(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_d(_1, _2); + } + // CHECK-LABEL: @vaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwev_q_du(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwod_q_du(_1, _2); + } + // CHECK-LABEL: @vsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwev_q_d(_1, _2); + } + // CHECK-LABEL: @vsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwod_q_d(_1, _2); + } + // CHECK-LABEL: @vsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwev_q_du(_1, _2); + } + // CHECK-LABEL: @vsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwod_q_du(_1, _2); + } + // CHECK-LABEL: @vaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_w(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_h(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_b(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_w(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_h(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_b(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwev_d_wu(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwev_w_hu(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwev_h_bu(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwod_d_wu(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwod_w_hu(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwod_h_bu(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_d(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_d(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwev_q_du(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwod_q_du(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhaddw_q_d(_1, _2); + } + // CHECK-LABEL: @vhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhaddw_qu_du(_1, _2); + } + // CHECK-LABEL: @vhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhsubw_q_d(_1, _2); + } + // CHECK-LABEL: @vhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhsubw_qu_du(_1, _2); + } + // CHECK-LABEL: @vmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vrotr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vrotr_b(_1, _2); + } + // CHECK-LABEL: @vrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vrotr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vrotr_h(_1, _2); + } + // CHECK-LABEL: @vrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vrotr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vrotr_w(_1, _2); + } + // CHECK-LABEL: @vrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vrotr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vrotr_d(_1, _2); + } + // CHECK-LABEL: @vadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } + // CHECK-LABEL: @vsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } + // CHECK-LABEL: @vldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } + // CHECK-LABEL: @vldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } + // CHECK-LABEL: @vldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } + // CHECK-LABEL: @vldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } + // CHECK-LABEL: @vmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } + // CHECK-LABEL: @vmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } + // CHECK-LABEL: @vexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } + // CHECK-LABEL: @vexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } + // CHECK-LABEL: @vexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } + // CHECK-LABEL: @vexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } + // CHECK-LABEL: @vexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } + // CHECK-LABEL: @vexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } + // CHECK-LABEL: @vexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } + // CHECK-LABEL: @vexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } + // CHECK-LABEL: @vrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } + // CHECK-LABEL: @vrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } + // CHECK-LABEL: @vrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } + // CHECK-LABEL: @vrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } + // CHECK-LABEL: @vextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } + // CHECK-LABEL: @vsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrani_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrani_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrani_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrani_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrarni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpermi_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpermi_w(_1, _2, 1); +@@ -4660,79 +6390,107 @@ v4i32 vpermi_w(v4i32 _1, v4i32 _2) { + // CHECK-LABEL: @vld( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } + // CHECK-LABEL: @vst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) + // CHECK-NEXT: ret void + // + void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } + // CHECK-LABEL: @vssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrn_b_h(_1, _2); + } + // CHECK-LABEL: @vssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrn_h_w(_1, _2); + } + // CHECK-LABEL: @vssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrln_b_h(_1, _2); + } + // CHECK-LABEL: @vssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrln_h_w(_1, _2); + } + // CHECK-LABEL: @vssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrln_w_d(_1, _2); + } + // CHECK-LABEL: @vorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } + // CHECK-LABEL: @vldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vldi() { return __builtin_lsx_vldi(1); } + // CHECK-LABEL: @vshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vshuf_b(_1, _2, _3); +@@ -4740,429 +6498,575 @@ v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + // CHECK-LABEL: @vldx( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } + // CHECK-LABEL: @vstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) + // CHECK-NEXT: ret void + // + void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } + // CHECK-LABEL: @vextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } + // CHECK-LABEL: @bnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } + // CHECK-LABEL: @bnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } + // CHECK-LABEL: @bnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } + // CHECK-LABEL: @bnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } + // CHECK-LABEL: @bnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } + // CHECK-LABEL: @bz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } + // CHECK-LABEL: @bz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } + // CHECK-LABEL: @bz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } + // CHECK-LABEL: @bz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } + // CHECK-LABEL: @bz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } + // CHECK-LABEL: @vfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_caf_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_caf_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_ceq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_ceq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cle_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cle_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_clt_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_clt_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cne_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cne_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cor_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cor_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cueq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cueq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cule_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cule_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cult_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cult_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cun_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cune_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cune_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cun_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_saf_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_saf_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_seq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_seq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sle_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sle_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_slt_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_slt_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sne_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sne_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sor_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sor_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sueq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sueq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sule_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sule_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sult_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sult_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sun_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sune_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sune_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sun_s(_1, _2); +@@ -5170,24 +7074,28 @@ v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { + // CHECK-LABEL: @vrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } + // CHECK-LABEL: @vrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } + // CHECK-LABEL: @vrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } + // CHECK-LABEL: @vrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } +-- +2.20.1 + diff --git a/0037-LoongArch-Permit-auto-vectorization-using-LSX-LASX-w.patch b/0037-LoongArch-Permit-auto-vectorization-using-LSX-LASX-w.patch new file mode 100644 index 0000000..1dfe076 --- /dev/null +++ b/0037-LoongArch-Permit-auto-vectorization-using-LSX-LASX-w.patch @@ -0,0 +1,188 @@ +From 959a4cd22a727480621a4dfbbdc2d2a61905dbe8 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 23 Jan 2024 09:06:35 +0800 +Subject: [PATCH 37/42] [LoongArch] Permit auto-vectorization using LSX/LASX + with `auto-vec` feature (#78943) + +With enough codegen complete, we can now correctly report the size of +vector registers for LSX/LASX, allowing auto vectorization (The +`auto-vec` feature needs to be enabled simultaneously). + +As described, the `auto-vec` feature is an experimental one. To ensure +that automatic vectorization is not enabled by default, because the +information provided by the current `TTI` cannot yield additional +benefits for automatic vectorization. + +(cherry picked from commit fcff4582f01db2f5a99e3acf452aec9f2d8a126a) + +--- + llvm/lib/Target/LoongArch/LoongArch.td | 4 ++ + .../lib/Target/LoongArch/LoongArchSubtarget.h | 2 + + .../LoongArchTargetTransformInfo.cpp | 18 +++++ + .../LoongArch/LoongArchTargetTransformInfo.h | 2 + + .../LoopVectorize/LoongArch/defaults.ll | 66 +++++++++++++++++++ + .../LoopVectorize/LoongArch/lit.local.cfg | 4 ++ + 6 files changed, 96 insertions(+) + create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll + create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg + +diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td +index 75b65fe69f26..2a4c991a43b0 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch.td ++++ b/llvm/lib/Target/LoongArch/LoongArch.td +@@ -105,6 +105,10 @@ def FeatureUAL + def FeatureRelax + : SubtargetFeature<"relax", "HasLinkerRelax", "true", + "Enable Linker relaxation">; ++// Experimental auto vectorization ++def FeatureAutoVec ++ : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", ++ "Experimental auto vectorization">; + + //===----------------------------------------------------------------------===// + // Registers, instruction descriptions ... +diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +index 5c173675cca4..174e4cba8326 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h ++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +@@ -44,6 +44,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { + bool HasLaLocalWithAbs = false; + bool HasUAL = false; + bool HasLinkerRelax = false; ++ bool HasExpAutoVec = false; + unsigned GRLen = 32; + MVT GRLenVT = MVT::i32; + LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; +@@ -102,6 +103,7 @@ public: + bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } + bool hasUAL() const { return HasUAL; } + bool hasLinkerRelax() const { return HasLinkerRelax; } ++ bool hasExpAutoVec() const { return HasExpAutoVec; } + MVT getGRLenVT() const { return GRLenVT; } + unsigned getGRLen() const { return GRLen; } + LoongArchABI::ABI getTargetABI() const { return TargetABI; } +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +index a6de86eea116..04349aa52b54 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +@@ -19,4 +19,22 @@ using namespace llvm; + + #define DEBUG_TYPE "loongarchtti" + ++TypeSize LoongArchTTIImpl::getRegisterBitWidth( ++ TargetTransformInfo::RegisterKind K) const { ++ switch (K) { ++ case TargetTransformInfo::RGK_Scalar: ++ return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); ++ case TargetTransformInfo::RGK_FixedWidthVector: ++ if (ST->hasExtLASX() && ST->hasExpAutoVec()) ++ return TypeSize::getFixed(256); ++ if (ST->hasExtLSX() && ST->hasExpAutoVec()) ++ return TypeSize::getFixed(128); ++ return TypeSize::getFixed(0); ++ case TargetTransformInfo::RGK_ScalableVector: ++ return TypeSize::getScalable(0); ++ } ++ ++ llvm_unreachable("Unsupported register kind"); ++} ++ + // TODO: Implement more hooks to provide TTI machinery for LoongArch. +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +index 9e02f793ba8a..d296c9ed576f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +@@ -39,6 +39,8 @@ public: + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + ++ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; ++ + // TODO: Implement more hooks to provide TTI machinery for LoongArch. + }; + +diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +new file mode 100644 +index 000000000000..a8ac2411dd82 +--- /dev/null ++++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +@@ -0,0 +1,66 @@ ++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ++; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s ++ ++;; This is a collection of tests whose only purpose is to show changes in the ++;; default configuration. Please keep these tests minimal - if you're testing ++;; functionality of some specific configuration, please place that in a ++;; seperate test file with a hard coded configuration (even if that ++;; configuration is the current default). ++ ++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++target triple = "loongarch64" ++ ++define void @vector_add(ptr noalias nocapture %a, i64 %v) { ++; CHECK-LABEL: define void @vector_add ++; CHECK-SAME: (ptr noalias nocapture [[A:%.*]], i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] { ++; CHECK-NEXT: entry: ++; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ++; CHECK: vector.ph: ++; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 ++; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ++; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ++; CHECK: vector.body: ++; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ++; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ++; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ++; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ++; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ++; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ++; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8 ++; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ++; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ++; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ++; CHECK: middle.block: ++; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ++; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ++; CHECK: scalar.ph: ++; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ++; CHECK-NEXT: br label [[FOR_BODY:%.*]] ++; CHECK: for.body: ++; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ++; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ++; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ++; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] ++; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 ++; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ++; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ++; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ++; CHECK: for.end: ++; CHECK-NEXT: ret void ++; ++entry: ++ br label %for.body ++ ++for.body: ++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv ++ %elem = load i64, ptr %arrayidx ++ %add = add i64 %elem, %v ++ store i64 %add, ptr %arrayidx ++ %iv.next = add nuw nsw i64 %iv, 1 ++ %exitcond.not = icmp eq i64 %iv.next, 1024 ++ br i1 %exitcond.not, label %for.end, label %for.body ++ ++for.end: ++ ret void ++} +diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg +new file mode 100644 +index 000000000000..9570af17fe5f +--- /dev/null ++++ b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg +@@ -0,0 +1,4 @@ ++config.suffixes = [".ll"] ++ ++if not "LoongArch" in config.root.targets: ++ config.unsupported = True +-- +2.20.1 + diff --git a/0038-CodeGen-LoongArch-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch b/0038-CodeGen-LoongArch-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch new file mode 100644 index 0000000..1a283a7 --- /dev/null +++ b/0038-CodeGen-LoongArch-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch @@ -0,0 +1,298 @@ +From a5bc7ef181511a199bf6e042f02c431ad667b52a Mon Sep 17 00:00:00 2001 +From: yjijd +Date: Tue, 23 Jan 2024 15:16:23 +0800 +Subject: [PATCH 38/42] [CodeGen][LoongArch] Set SINT_TO_FP/UINT_TO_FP to legal + for vector types (#78924) + +Support the following conversions: +v4i32->v4f32, v2i64->v2f64(LSX) +v8i32->v8f32, v4i64->v4f64(LASX) +v4i32->v4f64, v4i64->v4f32(LASX) + +(cherry picked from commit f799f936929c232a16abc7c520a10fecadbf05f9) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 4 ++ + .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 +++ + .../LoongArch/lasx/ir-instruction/sitofp.ll | 57 +++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/uitofp.ll | 57 +++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/sitofp.ll | 28 +++++++++ + .../LoongArch/lsx/ir-instruction/uitofp.ll | 28 +++++++++ + 7 files changed, 204 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index cf881ce720a6..7a360b42e15d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -256,6 +256,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } ++ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, ++ {MVT::v4i32, MVT::v2i64}, Legal); + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); +@@ -298,6 +300,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } ++ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, ++ {MVT::v8i32, MVT::v4i32, MVT::v4i64}, Legal); + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index b3c11bc5423d..b3e74b480922 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1611,6 +1611,28 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in + def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; + def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; + ++// XVFFINT_{S_W/D_L} ++def : Pat<(v8f32 (sint_to_fp v8i32:$vj)), (XVFFINT_S_W v8i32:$vj)>; ++def : Pat<(v4f64 (sint_to_fp v4i64:$vj)), (XVFFINT_D_L v4i64:$vj)>; ++def : Pat<(v4f64 (sint_to_fp v4i32:$vj)), ++ (XVFFINT_D_L (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, ++ sub_128)))>; ++def : Pat<(v4f32 (sint_to_fp v4i64:$vj)), ++ (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_L v4i64:$vj), 238), ++ (XVFFINT_D_L v4i64:$vj)), ++ sub_128)>; ++ ++// XVFFINT_{S_WU/D_LU} ++def : Pat<(v8f32 (uint_to_fp v8i32:$vj)), (XVFFINT_S_WU v8i32:$vj)>; ++def : Pat<(v4f64 (uint_to_fp v4i64:$vj)), (XVFFINT_D_LU v4i64:$vj)>; ++def : Pat<(v4f64 (uint_to_fp v4i32:$vj)), ++ (XVFFINT_D_LU (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, ++ sub_128)))>; ++def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), ++ (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_LU v4i64:$vj), 238), ++ (XVFFINT_D_LU v4i64:$vj)), ++ sub_128)>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 5569c2cd15b5..63eac4d1aeb7 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1742,6 +1742,14 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in + def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; + def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; + ++// VFFINT_{S_W/D_L} ++def : Pat<(v4f32 (sint_to_fp v4i32:$vj)), (VFFINT_S_W v4i32:$vj)>; ++def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; ++ ++// VFFINT_{S_WU/D_LU} ++def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; ++def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll +new file mode 100644 +index 000000000000..208a758ea4e9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll +@@ -0,0 +1,57 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @sitofp_v8i32_v8f32(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v8i32_v8f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.s.w $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %in ++ %v1 = sitofp <8 x i32> %v0 to <8 x float> ++ store <8 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @sitofp_v4f64_v4f64(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v4f64_v4f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.d.l $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %in ++ %v1 = sitofp <4 x i64> %v0 to <4 x double> ++ store <4 x double> %v1, ptr %res ++ ret void ++} ++ ++define void @sitofp_v4i64_v4f32(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v4i64_v4f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.d.l $xr0, $xr0 ++; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %in ++ %v1 = sitofp <4 x i64> %v0 to <4 x float> ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @sitofp_v4i32_v4f64(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v4i32_v4f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 ++; CHECK-NEXT: xvffint.d.l $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %in ++ %v1 = sitofp <4 x i32> %v0 to <4 x double> ++ store <4 x double> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll +new file mode 100644 +index 000000000000..70cf71c4cec2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll +@@ -0,0 +1,57 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @uitofp_v8i32_v8f32(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v8i32_v8f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %in ++ %v1 = uitofp <8 x i32> %v0 to <8 x float> ++ store <8 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @uitofp_v4f64_v4f64(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v4f64_v4f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %in ++ %v1 = uitofp <4 x i64> %v0 to <4 x double> ++ store <4 x double> %v1, ptr %res ++ ret void ++} ++ ++define void @uitofp_v4i64_v4f32(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v4i64_v4f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 ++; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %in ++ %v1 = uitofp <4 x i64> %v0 to <4 x float> ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @uitofp_v4i32_v4f64(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v4i32_v4f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 ++; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %in ++ %v1 = uitofp <4 x i32> %v0 to <4 x double> ++ store <4 x double> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll +new file mode 100644 +index 000000000000..1e820a37a240 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @sitofp_v4i32_v4f32(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v4i32_v4f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vffint.s.w $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %in ++ %v1 = sitofp <4 x i32> %v0 to <4 x float> ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @sitofp_v2i64_v2f64(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v2i64_v2f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vffint.d.l $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %in ++ %v1 = sitofp <2 x i64> %v0 to <2 x double> ++ store <2 x double> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll +new file mode 100644 +index 000000000000..3d4913f12e57 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @uitofp_v4i32_v4f32(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v4i32_v4f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vffint.s.wu $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %in ++ %v1 = uitofp <4 x i32> %v0 to <4 x float> ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @uitofp_v2i64_v2f64(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v2i64_v2f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vffint.d.lu $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %in ++ %v1 = uitofp <2 x i64> %v0 to <2 x double> ++ store <2 x double> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0039-CodeGen-LoongArch-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch b/0039-CodeGen-LoongArch-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch new file mode 100644 index 0000000..80e154f --- /dev/null +++ b/0039-CodeGen-LoongArch-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch @@ -0,0 +1,306 @@ +From 8f6d308508cd7e8568df5268230cf60d47cd9dbe Mon Sep 17 00:00:00 2001 +From: yjijd +Date: Tue, 23 Jan 2024 15:57:06 +0800 +Subject: [PATCH 39/42] [CodeGen][LoongArch] Set FP_TO_SINT/FP_TO_UINT to legal + for vector types (#79107) + +Support the following conversions: +v4f32->v4i32, v2f64->v2i64(LSX) +v8f32->v8i32, v4f64->v4i64(LASX) +v4f32->v4i64, v4f64->v4i32(LASX) + +(cherry picked from commit 44ba6ebc999d6e9b27bedfe04a993adfd204dc6a) + +--- + .../LoongArch/LoongArchISelLowering.cpp | 12 ++-- + .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 +++ + .../LoongArch/lasx/ir-instruction/fptosi.ll | 57 +++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/fptoui.ll | 57 +++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/fptosi.ll | 28 +++++++++ + .../LoongArch/lsx/ir-instruction/fptoui.ll | 28 +++++++++ + 7 files changed, 208 insertions(+), 4 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 7a360b42e15d..f7eacd56c542 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -256,8 +256,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } +- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, +- {MVT::v4i32, MVT::v2i64}, Legal); ++ for (MVT VT : {MVT::v4i32, MVT::v2i64}) { ++ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); ++ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); ++ } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); +@@ -300,8 +302,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } +- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, +- {MVT::v8i32, MVT::v4i32, MVT::v4i64}, Legal); ++ for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { ++ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); ++ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); ++ } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index b3e74b480922..492b62da6ce7 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1633,6 +1633,28 @@ def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), + (XVFFINT_D_LU v4i64:$vj)), + sub_128)>; + ++// XVFTINTRZ_{W_S/L_D} ++def : Pat<(v8i32 (fp_to_sint v8f32:$vj)), (XVFTINTRZ_W_S v8f32:$vj)>; ++def : Pat<(v4i64 (fp_to_sint v4f64:$vj)), (XVFTINTRZ_L_D v4f64:$vj)>; ++def : Pat<(v4i64 (fp_to_sint v4f32:$vj)), ++ (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), (VFTINTRZ_W_S v4f32:$vj), ++ sub_128))>; ++def : Pat<(v4i32 (fp_to_sint (v4f64 LASX256:$vj))), ++ (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), ++ v4f64:$vj)), ++ sub_128)>; ++ ++// XVFTINTRZ_{W_SU/L_DU} ++def : Pat<(v8i32 (fp_to_uint v8f32:$vj)), (XVFTINTRZ_WU_S v8f32:$vj)>; ++def : Pat<(v4i64 (fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)>; ++def : Pat<(v4i64 (fp_to_uint v4f32:$vj)), ++ (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), (VFTINTRZ_WU_S v4f32:$vj), ++ sub_128))>; ++def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))), ++ (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), ++ v4f64:$vj)), ++ sub_128)>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 63eac4d1aeb7..99ac2f3c162f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1750,6 +1750,14 @@ def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; + def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; + def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; + ++// VFTINTRZ_{W_S/L_D} ++def : Pat<(v4i32 (fp_to_sint v4f32:$vj)), (VFTINTRZ_W_S v4f32:$vj)>; ++def : Pat<(v2i64 (fp_to_sint v2f64:$vj)), (VFTINTRZ_L_D v2f64:$vj)>; ++ ++// VFTINTRZ_{W_SU/L_DU} ++def : Pat<(v4i32 (fp_to_uint v4f32:$vj)), (VFTINTRZ_WU_S v4f32:$vj)>; ++def : Pat<(v2i64 (fp_to_uint v2f64:$vj)), (VFTINTRZ_LU_D v2f64:$vj)>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll +new file mode 100644 +index 000000000000..0d9f57b57ffa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll +@@ -0,0 +1,57 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fptosi_v8f32_v8i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v8f32_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %in ++ %v1 = fptosi <8 x float> %v0 to <8 x i32> ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptosi_v4f64_v4i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v4f64_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %in ++ %v1 = fptosi <4 x double> %v0 to <4 x i64> ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @fptosi_v4f64_v4i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v4f64_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %in ++ %v1 = fptosi <4 x double> %v0 to <4 x i32> ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptosi_v4f32_v4i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v4f32_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 ++; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %in ++ %v1 = fptosi <4 x float> %v0 to <4 x i64> ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll +new file mode 100644 +index 000000000000..27d70f33cd34 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll +@@ -0,0 +1,57 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fptoui_v8f32_v8i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v8f32_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %in ++ %v1 = fptoui <8 x float> %v0 to <8 x i32> ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptoui_v4f64_v4i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v4f64_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %in ++ %v1 = fptoui <4 x double> %v0 to <4 x i64> ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @fptoui_v4f64_v4i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v4f64_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %in ++ %v1 = fptoui <4 x double> %v0 to <4 x i32> ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptoui_v4f32_v4i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v4f32_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 ++; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %in ++ %v1 = fptoui <4 x float> %v0 to <4 x i64> ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll +new file mode 100644 +index 000000000000..c3008fe96e47 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fptosi_v4f32_v4i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v4f32_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %in ++ %v1 = fptosi <4 x float> %v0 to <4 x i32> ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptosi_v2f64_v2i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v2f64_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %in ++ %v1 = fptosi <2 x double> %v0 to <2 x i64> ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll +new file mode 100644 +index 000000000000..f0aeb0bd14e7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fptoui_v4f32_v4i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v4f32_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %in ++ %v1 = fptoui <4 x float> %v0 to <4 x i32> ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptoui_v2f64_v2i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v2f64_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %in ++ %v1 = fptoui <2 x double> %v0 to <2 x i64> ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0040-LoongArch-Fixing-the-incorrect-return-value-of-Loong.patch b/0040-LoongArch-Fixing-the-incorrect-return-value-of-Loong.patch new file mode 100644 index 0000000..2e4e54a --- /dev/null +++ b/0040-LoongArch-Fixing-the-incorrect-return-value-of-Loong.patch @@ -0,0 +1,52 @@ +From 8da55d54445ddecf8a751368828d46425afc4ce6 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Fri, 26 Jan 2024 10:24:07 +0800 +Subject: [PATCH 40/42] [LoongArch] Fixing the incorrect return value of + LoongArchTTIImpl::getRegisterBitWidth (#79441) + +When we do not enable vector features, we should return the default +value (`TargetTransformInfoImplBase::getRegisterBitWidth`) instead of +zero. + +This should fix the LoongArch [buildbot +breakage](https://lab.llvm.org/staging/#/builders/5/builds/486) from + +(cherry picked from commit 1e9924c1f248bbddcb95d82a59708d617297dad3) +(cherry picked from commit 900e7cbfdee09c94d022e4dae923b3c7827f95e3) + +--- + .../Target/LoongArch/LoongArchTargetTransformInfo.cpp | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +index 04349aa52b54..d47dded9ea6e 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +@@ -21,17 +21,20 @@ using namespace llvm; + + TypeSize LoongArchTTIImpl::getRegisterBitWidth( + TargetTransformInfo::RegisterKind K) const { ++ TypeSize DefSize = TargetTransformInfoImplBase::getRegisterBitWidth(K); + switch (K) { + case TargetTransformInfo::RGK_Scalar: + return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); + case TargetTransformInfo::RGK_FixedWidthVector: +- if (ST->hasExtLASX() && ST->hasExpAutoVec()) ++ if (!ST->hasExpAutoVec()) ++ return DefSize; ++ if (ST->hasExtLASX()) + return TypeSize::getFixed(256); +- if (ST->hasExtLSX() && ST->hasExpAutoVec()) ++ if (ST->hasExtLSX()) + return TypeSize::getFixed(128); +- return TypeSize::getFixed(0); ++ [[fallthrough]]; + case TargetTransformInfo::RGK_ScalableVector: +- return TypeSize::getScalable(0); ++ return DefSize; + } + + llvm_unreachable("Unsupported register kind"); +-- +2.20.1 + diff --git a/0041-LoongArch-Pre-commit-test-for-76913.-NFC.patch b/0041-LoongArch-Pre-commit-test-for-76913.-NFC.patch new file mode 100644 index 0000000..6614c82 --- /dev/null +++ b/0041-LoongArch-Pre-commit-test-for-76913.-NFC.patch @@ -0,0 +1,71 @@ +From c27980fffb9082f2dfed259f0ef441badf3d7cb7 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 9 Jan 2024 20:32:20 +0800 +Subject: [PATCH 41/42] [LoongArch] Pre-commit test for #76913. NFC + +This test will crash with expensive check. + +Crash message: +``` +*** Bad machine code: Using an undefined physical register *** +- function: main +- basic block: %bb.0 entry (0x20fee70) +- instruction: $r3 = frame-destroy ADDI_D $r22, -288 +- operand 1: $r22 +``` + +(cherry picked from commit f499472de3e1184b83fc6cd78bc244a55f2cac7d) + +--- + .../LoongArch/can-not-realign-stack.ll | 39 +++++++++++++++++++ + 1 file changed, 39 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll + +diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll +new file mode 100644 +index 000000000000..526821076498 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll +@@ -0,0 +1,39 @@ ++; REQUIRES: expensive_checks ++; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s ++ ++; XFAIL: * ++ ++;; FIXME: This test will crash with expensive check. The subsequent patch will ++;; address and fix this issue. ++ ++%struct.S = type { [64 x i16] } ++ ++define dso_local noundef signext i32 @main() nounwind { ++entry: ++ %s = alloca %struct.S, align 2 ++ call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %s) ++ store <16 x i16> , ptr %s, align 2 ++ %0 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 ++ store <16 x i16> , ptr %0, align 2 ++ %1 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 ++ store <16 x i16> , ptr %1, align 2 ++ %2 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 ++ store <16 x i16> , ptr %2, align 2 ++ call void @foo(ptr noundef nonnull %s) ++ store <16 x i16> , ptr %s, align 2 ++ %3 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 ++ store <16 x i16> , ptr %3, align 2 ++ %4 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 ++ store <16 x i16> , ptr %4, align 2 ++ %5 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 ++ store <16 x i16> , ptr %5, align 2 ++ call void @bar(ptr noundef nonnull %s) ++ call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %s) ++ ret i32 0 ++} ++ ++declare void @foo(ptr nocapture noundef) ++declare void @bar(ptr nocapture noundef) ++ ++declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) ++declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) +-- +2.20.1 + diff --git a/0042-LoongArch-Implement-LoongArchRegisterInfo-canRealign.patch b/0042-LoongArch-Implement-LoongArchRegisterInfo-canRealign.patch new file mode 100644 index 0000000..ff38b44 --- /dev/null +++ b/0042-LoongArch-Implement-LoongArchRegisterInfo-canRealign.patch @@ -0,0 +1,150 @@ +From 4f2d73c1506aa56fddea5ad41d2acea97e1b36e3 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 9 Jan 2024 20:35:49 +0800 +Subject: [PATCH 42/42] [LoongArch] Implement + LoongArchRegisterInfo::canRealignStack() (#76913) + +This patch fixes the crash issue in the test: +CodeGen/LoongArch/can-not-realign-stack.ll + +Register allocator may spill virtual registers to the stack, which +introduces stack alignment requirements (when the size of spilled + registers exceeds the default alignment size of the stack). If a +function does not have stack alignment requirements before register +allocation, registers used for stack alignment will not be preserved. + +Therefore, we should implement `canRealignStack()` to inform the +register allocator whether it is allowed to perform stack realignment +operations. + +(cherry picked from commit 98c6aa72299caeff6b188e1ff2fc1b39c5b893b6) + +--- + .../LoongArch/LoongArchRegisterInfo.cpp | 23 ++++++++ + .../Target/LoongArch/LoongArchRegisterInfo.h | 1 + + .../LoongArch/can-not-realign-stack.ll | 56 +++++++++++++++++-- + 3 files changed, 75 insertions(+), 5 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +index 257b947a3ce4..092b5f1fb442 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +@@ -15,6 +15,7 @@ + #include "LoongArch.h" + #include "LoongArchInstrInfo.h" + #include "LoongArchSubtarget.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" + #include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "llvm/CodeGen/MachineFrameInfo.h" + #include "llvm/CodeGen/MachineFunction.h" +@@ -194,3 +195,25 @@ bool LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + return false; + } ++ ++bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { ++ if (!TargetRegisterInfo::canRealignStack(MF)) ++ return false; ++ ++ const MachineRegisterInfo *MRI = &MF.getRegInfo(); ++ const LoongArchFrameLowering *TFI = getFrameLowering(MF); ++ ++ // Stack realignment requires a frame pointer. If we already started ++ // register allocation with frame pointer elimination, it is too late now. ++ if (!MRI->canReserveReg(LoongArch::R22)) ++ return false; ++ ++ // We may also need a base pointer if there are dynamic allocas or stack ++ // pointer adjustments around calls. ++ if (TFI->hasReservedCallFrame(MF)) ++ return true; ++ ++ // A base pointer is required and allowed. Check that it isn't too late to ++ // reserve it. ++ return MRI->canReserveReg(LoongArchABI::getBPReg()); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +index 7e8f26b14097..d1e40254c297 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h ++++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +@@ -51,6 +51,7 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { + return true; + } ++ bool canRealignStack(const MachineFunction &MF) const override; + }; + } // end namespace llvm + +diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll +index 526821076498..af24ae64b7c7 100644 +--- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll ++++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll +@@ -1,14 +1,60 @@ +-; REQUIRES: expensive_checks +-; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s | FileCheck %s + +-; XFAIL: * ++;; This test is checking that when a function allows stack realignment and ++;; realignment needs were not detected before register allocation (at this ++;; point, fp is not preserved), but realignment is required during register ++;; allocation, the stack should not undergo realignment. + +-;; FIXME: This test will crash with expensive check. The subsequent patch will +-;; address and fix this issue. ++;; Ensure that the `bstrins.d $sp, $zero, n, 0` instruction is not generated. ++;; n = log2(realign_size) - 1 + + %struct.S = type { [64 x i16] } + + define dso_local noundef signext i32 @main() nounwind { ++; CHECK-LABEL: main: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -272 ++; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 256 # 8-byte Folded Spill ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 96 # 32-byte Folded Spill ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_1) ++; CHECK-NEXT: xvld $xr1, $a0, 0 ++; CHECK-NEXT: xvst $xr1, $sp, 64 # 32-byte Folded Spill ++; CHECK-NEXT: xvst $xr1, $sp, 224 ++; CHECK-NEXT: xvst $xr0, $sp, 192 ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_2) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_2) ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill ++; CHECK-NEXT: xvst $xr0, $sp, 160 ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_3) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_3) ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill ++; CHECK-NEXT: xvst $xr0, $sp, 128 ++; CHECK-NEXT: addi.d $fp, $sp, 128 ++; CHECK-NEXT: move $a0, $fp ++; CHECK-NEXT: bl %plt(foo) ++; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload ++; CHECK-NEXT: xvst $xr0, $sp, 224 ++; CHECK-NEXT: xvld $xr0, $sp, 96 # 32-byte Folded Reload ++; CHECK-NEXT: xvst $xr0, $sp, 192 ++; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload ++; CHECK-NEXT: xvst $xr0, $sp, 160 ++; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload ++; CHECK-NEXT: xvst $xr0, $sp, 128 ++; CHECK-NEXT: move $a0, $fp ++; CHECK-NEXT: bl %plt(bar) ++; CHECK-NEXT: move $a0, $zero ++; CHECK-NEXT: ld.d $fp, $sp, 256 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 264 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 272 ++; CHECK-NEXT: ret + entry: + %s = alloca %struct.S, align 2 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %s) +-- +2.20.1 + diff --git a/CVE-2023-46049.patch b/CVE-2023-46049.patch deleted file mode 100644 index fbbd3ec..0000000 --- a/CVE-2023-46049.patch +++ /dev/null @@ -1,34 +0,0 @@ -From c2515a8f2be5dd23354c9891f41ad104000f88c4 Mon Sep 17 00:00:00 2001 -From: Nikita Popov -Date: Tue, 26 Sep 2023 16:51:40 +0200 -Subject: [PATCH] [Bitcode] Add some missing GetTypeByID failure checks - -Print an error instead of crashing. - -Fixes https://github.com/llvm/llvm-project/issues/67388. ---- - llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -index b4b8690523244e..2b52b46a4ee5c4 100644 ---- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -@@ -1315,7 +1315,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - - unsigned TyID = Record[0]; - Type *Ty = Callbacks.GetTypeByID(TyID); -- if (Ty->isMetadataTy() || Ty->isVoidTy()) { -+ if (!Ty || Ty->isMetadataTy() || Ty->isVoidTy()) { - dropRecord(); - break; - } -@@ -1366,7 +1366,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - - unsigned TyID = Record[0]; - Type *Ty = Callbacks.GetTypeByID(TyID); -- if (Ty->isMetadataTy() || Ty->isVoidTy()) -+ if (!Ty || Ty->isMetadataTy() || Ty->isVoidTy()) - return error("Invalid record"); - - Value *V = ValueList.getValueFwdRef(Record[1], Ty, TyID, diff --git a/deprecated-recommonmark.patch b/deprecated-recommonmark.patch deleted file mode 100644 index b920e03..0000000 --- a/deprecated-recommonmark.patch +++ /dev/null @@ -1,25 +0,0 @@ ---- a/llvm/docs/conf.py 2024-09-02 15:08:42.392808121 +0800 -+++ b/llvm/docs/conf.py 2024-09-02 15:09:21.323150707 +0800 -@@ -36,21 +36,7 @@ - ".rst": "restructuredtext", - } - --try: -- import recommonmark --except ImportError: -- # manpages do not use any .md sources -- if not tags.has("builder-man"): -- raise --else: -- import sphinx -- -- if sphinx.version_info >= (3, 0): -- # This requires 0.5 or later. -- extensions.append("recommonmark") -- else: -- source_parsers = {".md": "recommonmark.parser.CommonMarkParser"} -- source_suffix[".md"] = "markdown" -+import sphinx - - # The encoding of source files. - # source_encoding = 'utf-8-sig' diff --git a/lit.lld-test.cfg.py b/lit.lld-test.cfg.py new file mode 100644 index 0000000..10d165b --- /dev/null +++ b/lit.lld-test.cfg.py @@ -0,0 +1,12 @@ +#Clear lld_tools_dir so we don't accidently pick up tools from somewhere else +config.lld_tools_dir = "" + +if hasattr(config, 'have_zlib'): + # Regression tests write output to this directory, so we need to be able to specify + # a temp directory when invoking lit. e.g. lit -Dlld_obj_root=/tmp/lit + config.lld_obj_root = "%(lld_obj_root)s" % lit_config.params + lit_config.load_config(config, '%(lld_test_root)s/lit.cfg.py' % lit_config.params) +else: + # For unit tests, llvm_obj_root is used to find the unit test binaries. + config.lld_obj_root = '%(lld_unittest_bindir)s' % lit_config.params + lit_config.load_config(config, '%(lld_test_root)s/Unit/lit.cfg.py' % lit_config.params) diff --git a/llvm.spec b/llvm.spec index 897d339..c711ae6 100644 --- a/llvm.spec +++ b/llvm.spec @@ -1,85 +1,398 @@ +##region globals +%global maj_ver 17 +%global min_ver 0 +%global patch_ver 6 + +##region components +%bcond_without check +%bcond_without python_lit +%bcond_without lldb +%bcond_without mlir +%bcond_without libcxx +# llvm <18 does not support isolated option to close EH support for flang +# so we disable flang building. +%bcond_with flang +# BOLT only supports aarch64 and x86_64 +%ifarch aarch64 x86_64 +%bcond_without build_bolt +%else +%bcond_with build_bolt +%endif +# llvm <18 does not support building polly shared libraries when building libLLVM.so +%bcond_with polly + +##endregion components + +# Disable LTO on x86 and riscv in order to reduce memory consumption. +%ifarch riscv64 +%bcond_with lto_build +%else +%bcond_without lto_build +%endif + +%if 0%{without lto_build} +%global _lto_cflags %nil +%endif + +%define gold_arches x86_64 aarch64 %{power64} +%ifarch %{gold_arches} + %bcond_without gold +%else + %bcond_with gold +%endif + +%bcond_without libedit + %global toolchain clang +%undefine _include_frame_pointers + +%global src_tarball_dir llvm-project-%{maj_ver}.%{min_ver}.%{patch_ver}.src + +%global has_crtobjs 1 + +%global build_ldflags %{?build_ldflags} -Wl,--build-id=sha1 + +##region LLVM globals +%global install_prefix %{_libdir}/llvm%{maj_ver} +%global install_bindir %{install_prefix}/bin +%global install_includedir %{install_prefix}/include +%global install_libdir %{install_prefix}/%{_lib} +%global install_datadir %{install_prefix}/share +%global install_mandir %{install_prefix}/share/man +%global install_libexecdir %{install_prefix}/libexec +%global build_libdir llvm/%{_vpath_builddir}/%{_lib} +%global unprefixed_libdir %{_lib} +%global targets_to_build "all" +%global experimental_targets_to_build "" +%global build_install_prefix %{buildroot}%{install_prefix} +%global llvm_triple %{_arch}-linux-gnu +%undefine _py3_shebang_P +##endregion LLVM globals + +##region COMPILER-RT globals +%global optflags %(echo %{optflags} -D_DEFAULT_SOURCE) +%global optflags %(echo %{optflags} -Dasm=__asm__) +##endregion COMPILER-RT globals + +##region openmp globals +%global so_suffix %{maj_ver}.%{min_ver} +%ifarch ppc64le +%global libomp_arch ppc64 +%else +%global libomp_arch %{_arch} +%endif +##endregion openmp globals + +##endregion globals + + + +##region packages +##region main package Name: llvm -Version: 17.0.6 -Release: 10%{?dist} -Summary: Low Level Virtual Machine, modular and reusable compiler and toolchain -License: Apache License v2.0 with LLVM Exceptions +Version: %{maj_ver}.%{min_ver}.%{patch_ver} +Release: 11%{?dist} +Summary: The Low Level Virtual Machine + +License: Apache-2.0 WITH LLVM-exception OR NCSA URL: http://llvm.org -Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{version}/llvm-%{version}.src.tar.xz -Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{version}/cmake-%{version}.src.tar.xz -Source2: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{version}/third-party-%{version}.src.tar.xz - -# https://github.com/llvm/llvm-project/commit/c2515a8f2be5dd23354c9891f41ad104000f88c4 -Patch0001: CVE-2023-46049.patch - -# Backport patches to support `relax`, vector, some improvements and bugfixs on LoongArch -Patch0002: 0001-Backport-LoongArch-Add-relax-feature-and-keep-relocations.patch -Patch0003: 0002-Backport-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocations.patch -Patch0004: 0003-Backport-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-LoadAddress.patch -Patch0005: 0004-Backport-MC-LoongArch-Add-AlignFragment-size-if-layout-is-available-and-not-need-insert-nops.patch -Patch0006: 0005-Backport-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_RISCV_-SET-SUB-_ULEB128-for-uleb128-directives.patch -Patch0007: 0006-Backport-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-to-handle-the-mutable-label-diff-in-dwarfinfo.patch -Patch0008: 0007-Backport-LoongArch-Insert-nops-and-emit-align-reloc-when-handle-alignment-directive.patch -Patch0009: 0008-Backport-test-Update-dwarf-loongarch-relocs.ll.patch -Patch0010: 0009-Backport-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-private-symbols-in-.uleb128-for-label-differences.patch -Patch0011: 0010-Backport-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-isMicroMips-special-case.patch -Patch0012: 0011-Backport-LoongArch-Add-the-support-for-vector-in-llvm17.patch -Patch0013: 0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch -Patch0014: 0013-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch -Patch0015: 0014-Backport-LoongArch-fix-and-add-some-new-support.patch -Patch0016: 0015-LoongArch-Precommit-test-case-to-show-bug-in-LoongAr.patch -Patch0017: 0016-LoongArch-Pass-OptLevel-to-LoongArchDAGToDAGISel-cor.patch -Patch0018: 0017-LoongArch-Fix-test-cases-after-2dd8460d8a36.patch -Patch3000: deprecated-recommonmark.patch -Patch3001: 0001-Clear-instructions-not-recorded-in-ErasedInstrs.patch - -%define maj_ver %(echo %{version} | cut -d. -f1) -%define min_ver %(echo %{version} | cut -d. -f2) -%define patch_ver %(echo %{version} | cut -d. -f3) - -BuildRequires: gcc gcc-c++ clang cmake ninja-build -BuildRequires: zlib-devel libffi-devel ncurses-devel binutils-devel libedit-devel -BuildRequires: python3-sphinx -# for valgrind support -BuildRequires: valgrind-devel -# fo patchfix.py -BuildRequires: python3-devel - -Requires: %{name}-libs = %{version}-%{release} +Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{version}/llvm-project-%{version}.src.tar.xz +# LLD test files +Source1331: run-lit-tests +Source1332: lit.lld-test.cfg.py +#end LLD test files + +## common patch, start from 0001 to 0299 +## Architecture patch,start from 0300 to 0999 +##region CLANG patches +Patch0001: 0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch +Patch0002: 0003-PATCH-clang-Don-t-install-static-libraries.patch +Patch0003: 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch +Patch0004: 0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch +Patch0005: 0001-clang-shlib-Add-symbol-versioning-to-all-symbols.patch +##endregion CLANG patches + +##region OpenMP patches +Patch0006: 0001-openmp-Add-option-to-disable-tsan-tests-111548.patch +#Patch1901: 0001-openmp-Use-core_siblings_list-if-physical_package_id.patch +##endregion OpenMP patches + +##region LLD patches +Patch0007: 0001-18-Always-build-shared-libs-for-LLD.patch +##endregion LLD patches + +Patch0008: 0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch + +%if %{with flang} +##region flang pathes +Patch0009: 0001-flang-Remove-the-dependency-on-Bye.patch +##endregion flang patches +%endif + +# triple implement for loongarch +Patch0300: 0001-LoongArch-Add-support-for-OpenCloudOS-triple.patch +# relax implement for loongarch and riscv +Patch0301: 0001-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-6-relocat.patch +Patch0302: 0002-LoongArch-Add-relax-feature-and-keep-relocations-721.patch +Patch0303: 0003-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocat.patch +Patch0304: 0004-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-Loa.patch +Patch0305: 0005-MC-LoongArch-Add-AlignFragment-size-if-layout-is-ava.patch +Patch0306: 0006-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch +Patch0307: 0007-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-t.patch +Patch0308: 0008-LoongArch-Insert-nops-and-emit-align-reloc-when-hand.patch +Patch0309: 0009-ELF-RISCV-Implement-emit-relocs-with-relaxation.patch +Patch0310: 0010-lld-ELF-Support-relax-R_LARCH_ALIGN-78692.patch +Patch0311: 0011-test-Update-dwarf-loongarch-relocs.ll.patch +Patch0312: 0012-lld-LoongArch-Support-the-R_LARCH_-ADD-SUB-_ULEB128-.patch +Patch0313: 0013-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-priv.patch +Patch0314: 0014-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-i.patch +# vector implement for loongarch +Patch0315: 0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch +Patch0316: 0002-LoongArch-Add-LSX-intrinsic-support.patch +Patch0317: 0003-LoongArch-Add-LASX-intrinsic-support.patch +Patch0318: 0004-LoongArch-Add-LSX-intrinsic-testcases.patch +Patch0319: 0005-LoongArch-Add-LASX-intrinsic-testcases.patch +Patch0320: 0006-LoongArch-Add-testcases-of-LASX-intrinsics-with-imme.patch +Patch0321: 0007-LoongArch-MC-Add-invalid-immediate-testcases-for-LSX.patch +Patch0322: 0008-LoongArch-MC-Add-invalid-immediate-testcases-for-LAS.patch +Patch0323: 0009-LoongArch-Add-testcases-of-LSX-intrinsics-with-immed.patch +Patch0324: 0010-LoongArch-Reorder-LoongArchTargetLowering-.-NFC.patch +Patch0325: 0011-LoongArch-Fix-typos.-NFC.patch +Patch0326: 0012-LoongArch-Set-some-operations-action-for-LSX-and-LAS.patch +Patch0327: 0013-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch +Patch0328: 0014-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch +Patch0329: 0015-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch +Patch0330: 0016-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch +Patch0331: 0017-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch +Patch0332: 0018-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch +Patch0333: 0019-LoongArch-Add-codegen-support-for-extractelement-737.patch +Patch0334: 0020-LoongArch-Add-some-binary-IR-instructions-testcases-.patch +Patch0335: 0021-LoongArch-Add-codegen-support-for-insertelement.patch +Patch0336: 0022-LoongArch-Custom-lowering-ISD-BUILD_VECTOR.patch +Patch0337: 0023-LoongArch-Add-more-and-or-xor-patterns-for-vector-ty.patch +Patch0338: 0024-LoongArch-Add-some-binary-IR-instructions-testcases-.patch +Patch0339: 0025-LoongArch-Override-TargetLowering-isShuffleMaskLegal.patch +Patch0340: 0026-Reland-LoongArch-Support-CTLZ-with-lsx-lasx.patch +Patch0341: 0027-LoongArch-Support-MULHS-MULHU-with-lsx-lasx.patch +Patch0342: 0028-LoongArch-Make-ISD-VSELECT-a-legal-operation-with-ls.patch +Patch0343: 0029-LoongArch-Add-codegen-support-for-icmp-fcmp-with-lsx.patch +Patch0344: 0030-LoongArch-Make-ISD-FSQRT-a-legal-operation-with-lsx-.patch +Patch0345: 0031-LoongArch-Mark-ISD-FNEG-as-legal.patch +Patch0346: 0032-LoongArch-Add-codegen-support-for-X-VF-MSUB-NMADD-NM.patch +Patch0347: 0033-LoongArch-Fix-LASX-vector_extract-codegen.patch +Patch0348: 0034-LoongArch-Fix-incorrect-pattern-XVREPL128VEI_-W-D-in.patch +Patch0349: 0035-LoongArch-Fix-incorrect-pattern-X-VBITSELI_B-instruc.patch +Patch0350: 0036-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch +Patch0351: 0037-LoongArch-Permit-auto-vectorization-using-LSX-LASX-w.patch +Patch0352: 0038-CodeGen-LoongArch-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch +Patch0353: 0039-CodeGen-LoongArch-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch +Patch0354: 0040-LoongArch-Fixing-the-incorrect-return-value-of-Loong.patch +Patch0355: 0041-LoongArch-Pre-commit-test-for-76913.-NFC.patch +Patch0356: 0042-LoongArch-Implement-LoongArchRegisterInfo-canRealign.patch +# compiler-rt bugfix for loongarch +Patch0357: 0001-sanitizer-msan-VarArgHelper-for-loongarch64.patch +Patch0358: 0002-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch +Patch0359: 0003-LowerTypeTests-Add-loongarch64-to-CFI-jumptables-673.patch +Patch0360: 0004-CFI-Allow-LoongArch-67314.patch +Patch0361: 0005-test-compiler-rt-Mark-several-tests-as-UNSUPPORTED-o.patch +Patch0362: 0006-tsan-Add-support-for-linux-loongarch64-in-lib-tsan-g.patch +Patch0363: 0007-tsan-Refine-fstat-64-interceptors-86625.patch +Patch0364: 0008-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch +Patch0365: 0009-LoongArch-Fix-td-pattern-for-CACOP-LDPTE-and-LDDIR.patch +Patch0366: 0010-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch +Patch0367: 0011-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch +Patch0368: 0012-LoongArch-Pre-commit-test-for-issue-70890.patch +Patch0369: 0013-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch +Patch0370: 0014-Driver-Default-LoongArch-to-fno-direct-access-extern.patch +Patch0371: 0015-LoongArch-MC-Refine-MCInstrAnalysis-based-on-registe.patch +Patch0372: 0016-LoongArch-NFC-Pre-commit-MCInstrAnalysis-tests-for-i.patch +Patch0373: 0017-LoongArch-Set-isBarrier-to-true-for-instruction-b-72.patch +Patch0374: 0018-LoongArch-MC-Pre-commit-tests-for-instr-bl-fixupkind.patch +Patch0375: 0019-LoongArch-MC-Support-to-get-the-FixupKind-for-BL-729.patch +Patch0376: 0020-LoongArch-MC-Modify-branch-evaluation-for-MCInstrAna.patch +Patch0377: 0021-LoongArch-Precommit-a-test-for-smul-with-overflow-NF.patch +Patch0378: 0022-LoongArch-Disable-mulodi4-and-muloti4-libcalls-73199.patch +Patch0379: 0023-LoongArch-Fix-pattern-for-FNMSUB_-S-D-instructions-7.patch +Patch0380: 0024-LoongArch-Fix-the-procossor-series-mask.patch +Patch0381: 0025-LoongArch-Make-sure-that-the-LoongArchISD-BSTRINS-no.patch +Patch0382: 0026-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch +Patch0383: 0027-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch +# loongarch improve +Patch0384: 0001-LoongArch-Improve-codegen-for-atomic-ops-67391.patch +Patch0385: 0002-LoongArch-Add-some-atomic-tests-68766.patch +Patch0386: 0003-LoongArch-Support-finer-grained-DBAR-hints-for-LA664.patch +Patch0387: 0004-LoongArch-Precommit-a-test-for-atomic-cmpxchg-optmiz.patch +Patch0388: 0005-LoongArch-Improve-codegen-for-atomic-cmpxchg-ops-693.patch +Patch0389: 0006-LoongArch-Override-LoongArchTargetLowering-getExtend.patch +Patch0390: 0007-Memory-Call-__clear_cache-in-InvalidateInstructionCa.patch +# loongarch call36 v1.1 +Patch0391: 0001-Driver-Support-mcmodel-for-LoongArch-72514.patch +Patch0392: 0002-BinaryFormat-LoongArch-Define-psABI-v2.20-relocs-for.patch +Patch0393: 0003-lld-LoongArch-Support-the-R_LARCH_CALL36-relocation-.patch +Patch0394: 0004-lld-test-LoongArch-Remove-the-test-for-R_LARCH_CALL3.patch +Patch0395: 0005-Revert-lld-test-LoongArch-Remove-the-test-for-R_LARC.patch +Patch0396: 0006-LoongArch-Emit-function-call-code-sequence-as-PCADDU.patch +Patch0397: 0007-LoongArch-Pre-commit-test-for-76555.-NFC.patch +Patch0398: 0008-LoongArch-test-Remove-the-FIXME-in-psabi-restricted-.patch +Patch0399: 0009-LoongArch-Reimplement-the-expansion-of-PseudoLA-_LAR.patch +Patch0400: 0010-LoongArch-Fix-Wunused-variable-in-LoongArchExpandPse.patch +Patch0401: 0011-lld-ELF-Add-a-corner-testcase-for-elf-getLoongArchPa.patch +Patch0402: 0012-lld-LoongArch-Add-a-another-corner-testcase-for-elf-.patch +Patch0403: 0013-lld-LoongArch-Handle-extreme-code-model-relocs-accor.patch +Patch0404: 0014-LoongArch-clang-Add-support-for-option-msimd-and-mac.patch +Patch0405: 0015-LoongArch-clang-Modify-loongarch-msimd.c-to-avoid-gr.patch +Patch0406: 0016-LoongArch-CodeGen-Implement-128-bit-and-256-bit-vect.patch +Patch0407: 0017-LoongArch-Enable-128-bits-vector-by-default-100056.patch +Patch0408: 0018-LoongArch-Add-LoongArch-V1.1-instructions-definition.patch +Patch0409: 0019-LoongArch-Add-definitions-and-feature-frecipe-for-FP.patch +Patch0410: 0020-LoongArch-Support-march-la64v1.0-and-march-la64v1.1-.patch +Patch0411: 0021-LoongArch-Support-la664-100068.patch +Patch0412: 0022-LoongArch-Fix-test-issue-of-init-loongarch.c.patch +Patch0413: 0023-LoongArch-Remove-experimental-auto-vec-feature.-1000.patch + +BuildRequires: gcc +BuildRequires: gcc-c++ +BuildRequires: clang +BuildRequires: cmake +BuildRequires: ccache +BuildRequires: chrpath +BuildRequires: ninja-build +BuildRequires: zlib-devel +BuildRequires: libzstd-devel +BuildRequires: libffi-devel +BuildRequires: ncurses-devel + +%if %{with flang} +BuildRequires: python3-recommonmark +%endif + +BuildRequires: python3-sphinx +BuildRequires: python3-psutil +BuildRequires: python3-pexpect +BuildRequires: python3-myst-parser +%if %{with gold} +BuildRequires: binutils-devel +BuildRequires: binutils-gold +%endif +%ifarch %{valgrind_arches} +BuildRequires: valgrind-devel +%endif +%if %{with libedit} +BuildRequires: libedit-devel +%endif +# We need python3-devel for %%py3_shebang_fix +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: swig +BuildRequires: libxml2-devel +BuildRequires: doxygen + +# For clang-offload-packager +BuildRequires: elfutils-libelf-devel +BuildRequires: perl +BuildRequires: perl-Data-Dumper +BuildRequires: perl-Encode +BuildRequires: libffi-devel +BuildRequires: perl-generators +BuildRequires: emacs +BuildRequires: libatomic + +# scan-build uses these perl modules so they need to be installed in order +# to run the tests. +BuildRequires: perl(Digest::MD5) +BuildRequires: perl(File::Copy) +BuildRequires: perl(File::Find) +BuildRequires: perl(File::Path) +BuildRequires: perl(File::Temp) +BuildRequires: perl(FindBin) +BuildRequires: perl(Hash::Util) +BuildRequires: perl(lib) +BuildRequires: perl(Term::ANSIColor) +BuildRequires: perl(Text::ParseWords) +BuildRequires: perl(Sys::Hostname) + +%if %{with mlir} +BuildRequires: python3-lit +BuildRequires: python3-devel +BuildRequires: python3-numpy +BuildRequires: python3-pybind11 +BuildRequires: python3-pyyaml +%endif + +BuildRequires: graphviz + +# This is required because we need "ps" when running LLDB tests +BuildRequires: procps-ng +BuildRequires: /usr/bin/marshalparser +%global py_reproducible_pyc_path %{buildroot}%{python3_sitelib} +Requires: llvm-libs = %{version}-%{release} Provides: llvm(major) = %{maj_ver} %description -The LLVM Project is a collection of modular and reusable compiler -and toolchain technologies, including llvm, clang, lldb, compiler-rt, -and so on.The LLVM Core libraries provide a modern source- and -target-independent optimizer, along with code generation support for -many popular CPUs. These libraries are built around a well specified -code representation known as the LLVM intermediate representation ("LLVM IR"). +LLVM is a compiler infrastructure designed for compile-time, link-time, +runtime, and idle-time optimization of programs from arbitrary programming +languages. The compiler infrastructure includes mirror sets of programming +tools as well as libraries with equivalent functionality. +##endregion main package + + +##region LLVM lit package +%if %{with python_lit} +%package -n python3-lit +Summary: LLVM lit test runner for Python 3 +BuildArch: noarch +Recommends: python3-psutil + +%description -n python3-lit +lit is a tool used by the LLVM project for executing its test suites. +%endif +##endregion LLVM lit package + -Documentation is https://llvm.org/docs/. +##region LLVM packages +%package -n llvm-filesystem +Summary: Filesystem package that owns the versioned llvm prefix +%description -n llvm-filesystem +This packages owns the versioned llvm prefix directory: $libdir/llvm$version -%package devel +%package -n llvm-devel Summary: Libraries and header files for LLVM -Requires: %{name} = %{version}-%{release} -Requires: %{name}-libs = %{version}-%{release} -# for -ledit to the linker flags +Requires: llvm = %{version}-%{release} +Requires: llvm-libs = %{version}-%{release} +# The installed LLVM cmake files will add -ledit to the linker flags for any +# app that requires the libLLVMLineEditor, so we need to make sure +# libedit-devel is available. +%if %{with libedit} Requires: libedit-devel -Requires: %{name}-test = %{version}-%{release} +%endif +Requires: libzstd-devel +Requires: llvm-static = %{version}-%{release} +Requires: llvm-test = %{version}-%{release} +Requires: llvm-googletest = %{version}-%{release} +Requires(post): alternatives +Requires(postun): alternatives Provides: llvm-devel(major) = %{maj_ver} -Provides: llvm-static(major) = %{maj_ver} -Provides: llvm-static - -%description devel -Library and header files to develop programs using the LLVM infrastructure. +%description -n llvm-devel +This package contains library and header files needed to develop new native +programs that use the LLVM infrastructure. -%package libs +%package -n llvm-libs Summary: LLVM shared libraries +Requires: llvm-filesystem = %{version}-%{release} + +%description -n llvm-libs +Shared libraries for the LLVM compiler infrastructure. + +%package -n llvm-static +Summary: LLVM static libraries +Requires: llvm-filesystem = %{version}-%{release} +Provides: llvm-static(major) = %{maj_ver} -%description libs -Shared libraries for LLVM. +%description -n llvm-static +Static libraries for the LLVM compiler infrastructure. %package cmake-utils Summary: CMake utilities shared across LLVM subprojects @@ -88,157 +401,1295 @@ Summary: CMake utilities shared across LLVM subprojects CMake utilities shared across LLVM subprojects. This is for internal use by LLVM packages only. -%package test +%package -n llvm-test Summary: LLVM regression tests -Requires: %{name} = %{version}-%{release} -Requires: %{name}-libs = %{version}-%{release} +Requires: llvm = %{version}-%{release} +Requires: llvm-libs = %{version}-%{release} Provides: llvm-test(major) = %{maj_ver} -Provides: llvm-googletest -%description test -LLVM regression tests adn LLVM's modified googletest sources. +%description -n llvm-test +LLVM regression tests. + +%package -n llvm-googletest +Requires: llvm-filesystem = %{version}-%{release} +Summary: LLVM's modified googletest sources + +%description -n llvm-googletest +LLVM's modified googletest sources. +##endregion LLVM packages + + +##region CLANG packages +%package -n clang +Summary: A C language family front-end for LLVM +Requires: clang-libs = %{version}-%{release} +Requires: libstdc++-devel +Requires: gcc-c++ +Provides: clang(major) = %{maj_ver} + +%description -n clang +clang: noun + 1. A loud, resonant, metallic sound. + 2. The strident call of a crane or goose. + 3. C-language family front-end toolkit. + +The goal of the Clang project is to create a new C, C++, Objective C +and Objective C++ front-end for the LLVM compiler. Its tools are built +as libraries and designed to be loosely-coupled and extensible. + +Install compiler-rt if you want the Blocks C language extension or to +enable sanitization and profiling options when building, and +libomp-devel to enable -fopenmp. + +%package -n clang-libs +Summary: Runtime library for clang +Requires: clang-resource-filesystem = %{version}-%{release} +Recommends: compiler-rt = %{version}-%{release} +Requires: llvm-libs = %{version}-%{release} +Recommends: libatomic +# libomp-devel is required, so clang can find the omp.h header when compiling +# with -fopenmp. +Recommends: libomp-devel = %{version}-%{release} +Recommends: libomp = %{version}-%{release} + +%description -n clang-libs +Runtime library for clang. + +%package -n clang-devel +Summary: Development header files for clang +Requires: clang-libs = %{version}-%{release} +Requires: clang = %{version}-%{release} +Requires: clang-tools-extra = %{version}-%{release} +Requires: llvm-devel = %{version}-%{release} +Provides: clang-devel(major) = %{maj_ver} +Provides: clangd = %{version}-%{release} + +%description -n clang-devel +Development header files for clang. + +%package -n clang-resource-filesystem +Summary: Filesystem package that owns the clang resource directory +Provides: clang-resource-filesystem(major) = %{maj_ver} + +%description -n clang-resource-filesystem +This package owns the clang resouce directory: $libdir/clang/$version/ + +%package -n clang-analyzer +Summary: A source code analysis framework +License: Apache-2.0 WITH LLVM-exception OR NCSA OR MIT +Requires: clang = %{version}-%{release} + +%description -n clang-analyzer +The Clang Static Analyzer consists of both a source code analysis +framework and a standalone tool that finds bugs in C and Objective-C +programs. The standalone tool is invoked from the command-line, and is +intended to run in tandem with a build of a project or code base. + +%package -n clang-tools-extra +Summary: Extra tools for clang +Requires: clang-libs = %{version}-%{release} +Requires: emacs-filesystem + +%description -n clang-tools-extra +A set of extra tools built using Clang's tooling API. + +%package -n clang-tools-extra-devel +Summary: Development header files for clang tools +Requires: clang-tools-extra = %{version}-%{release} + +%description -n clang-tools-extra-devel +Development header files for clang tools. + +%package -n git-clang-format +Summary: Integration of clang-format for git +Requires: clang-tools-extra = %{version}-%{release} +Requires: git +Requires: python3 + +%description -n git-clang-format +clang-format integration for git. + +%package -n python3-clang +Summary: Python3 bindings for clang +Requires: clang-devel = %{version}-%{release} +Requires: python3 + +%description -n python3-clang +Python3 bindings for clang. +##endregion CLANG packages + + +##region COMPILER-RT packages +%package -n compiler-rt +Summary: LLVM "compiler-rt" runtime libraries +License: Apache-2.0 WITH LLVM-exception OR NCSA OR MIT +Requires: clang-resource-filesystem = %{version}-%{release} +Provides: compiler-rt(major) = %{maj_ver} + +%description -n compiler-rt +The compiler-rt project is a part of the LLVM project. It provides +implementation of the low-level target-specific hooks required by +code generation, sanitizer runtimes and profiling library for code +instrumentation, and Blocks C language extension. +##endregion COMPILER-RT packages + + +##region OPENMP packages +%package -n libomp +Summary: OpenMP runtime for clang +URL: http://openmp.llvm.org +Requires: llvm-libs = %{version}-%{release} +Requires: elfutils-libelf +Provides: libomp(major) = %{maj_ver} + +%description -n libomp +OpenMP runtime for clang. + +%package -n libomp-devel +Summary: OpenMP header files +URL: http://openmp.llvm.org +Requires: libomp = %{version}-%{release} +Requires: clang-resource-filesystem = %{version}-%{release} +Provides: libomp-devel(major) = %{maj_ver} +%description -n libomp-devel +OpenMP header files. +URL: http://openmp.llvm.org +##endregion OPENMP packages + +##region LLD packages +%package -n lld +Summary: The LLVM Linker +Requires(post): alternatives +Requires(preun): alternatives +Requires: lld-libs = %{version}-%{release} +Provides: lld(major) = %{maj_ver} + +%description -n lld +The LLVM project linker. + +%package -n lld-devel +Summary: Libraries and header files for LLD +Requires: lld-libs = %{version}-%{release} +Requires: lld = %{version}-%{release} +Provides: lld-devel(major) = %{maj_ver} + +%description -n lld-devel +This package contains library and header files needed to develop new native +programs that use the LLD infrastructure. + +%package -n lld-libs +Summary: LLD shared libraries +Requires: llvm-libs = %{version}-%{release} + +%description -n lld-libs +Shared libraries for LLD. + +%package -n lld-test +Summary: LLD regression tests +Requires: lld = %{version}-%{release} +Requires: python3-lit +Requires: llvm-test(major) = %{maj_ver} +Requires: lld-libs = %{version}-%{release} + +%description -n lld-test +LLVM regression tests. +##endregion LLD packages + +##region LLDB packages +%if %{with lldb} +%package -n lldb +Summary: Next generation high-performance debugger +License: Apache-2.0 WITH LLVM-exception OR NCSA +URL: http://lldb.llvm.org/ +Requires: clang-libs = %{version}-%{release} +Requires: python3-lldb + +%description -n lldb +LLDB is a next generation, high-performance debugger. It is built as a set +of reusable components which highly leverage existing libraries in the +larger LLVM Project, such as the Clang expression parser and LLVM +disassembler. + +%package -n lldb-devel +Summary: Development header files for LLDB +Requires: lldb = %{version}-%{release} + +%description -n lldb-devel +The package contains header files for the LLDB debugger. + +%package -n python3-lldb +Summary: Python module for LLDB +Requires: lldb = %{version}-%{release} + +%description -n python3-lldb +The package contains the LLDB Python module. +%endif +##endregion LLDB packages + +##region MLIR packages +%if %{with mlir} +%package -n mlir +Summary: Multi-Level Intermediate Representation Overview +License: Apache-2.0 WITH LLVM-exception +URL: http://mlir.llvm.org +Requires: llvm-libs = %{version}-%{release} + +%description -n mlir +The MLIR project is a novel approach to building reusable and extensible +compiler infrastructure. MLIR aims to address software fragmentation, +improve compilation for heterogeneous hardware, significantly reduce +the cost of building domain specific compilers, and aid in connecting +existing compilers together. + +%package -n mlir-static +Summary: MLIR static files +Requires: mlir = %{version}-%{release} + +%description -n mlir-static +MLIR static files. + +%package -n mlir-devel +Summary: MLIR development files +Requires: mlir = %{version}-%{release} +Requires: mlir-static = %{version}-%{release} + +%description -n mlir-devel +MLIR development files. + +%package -n python3-mlir +%{?python_provide:%python_provide python3-mlir} +Summary: MLIR python bindings + +Requires: python3 +Requires: python3-numpy + +%description -n python3-mlir +MLIR python bindings. +%endif +##endregion MLIR packages + + +##region flang packages +%if %{with flang} +%package -n flang +Summary: Fortran language front-end designed for integration with LLVM +License: Apache-2.0 WITH LLVM-exception +URL: https://flang.llvm.org + +%description -n flang +Flang is a ground-up implementation of a Fortran front end written in modern +C++. + +%package -n flang-devel +Summary: flang development files +Requires: flang = %{version}-%{release} + +%description -n flang-devel +flang development files. +%endif +##endregion flang packages + + +##region libcxx packages +%if %{with libcxx} +%package -n libcxx +Summary: C++ standard library targeting C++11 +License: Apache-2.0 WITH LLVM-exception OR MIT OR NCSA +URL: http://libcxx.llvm.org/ +Requires: libcxxabi = %{version}-%{release} + +%description -n libcxx +libc++ is a new implementation of the C++ standard library, targeting C++11 and above. + +%package -n libcxx-devel +Summary: Headers and libraries for libcxx devel +Requires: libcxx = %{version}-%{release} +Requires: libcxxabi-devel + +%description -n libcxx-devel +Headers and libraries for libcxx devel. + +%package -n libcxx-static +Summary: Static libraries for libcxx + +%description -n libcxx-static +Static libraries for libcxx. + +%package -n libcxxabi +Summary: Low level support for a standard C++ library + +%description -n libcxxabi +libcxxabi provides low level support for a standard C++ library. + +%package -n libcxxabi-devel +Summary: Headers and libraries for libcxxabi devel +Requires: libcxxabi = %{version}-%{release} + +%description -n libcxxabi-devel +Headers and libraries for libcxxabi devel. + +%package -n libcxxabi-static +Summary: Static libraries for libcxxabi + +%description -n libcxxabi-static +Static libraries for libcxxabi. + +%package -n llvm-libunwind +Summary: LLVM libunwind + +%description -n llvm-libunwind +LLVM libunwind is an implementation of the interface defined by the HP libunwind +project. It was contributed Apple as a way to enable clang++ to port to +platforms that do not have a system unwinder. It is intended to be a small and +fast implementation of the ABI, leaving off some features of HP's libunwind +that never materialized (e.g. remote unwinding). + +%package -n llvm-libunwind-devel +Summary: LLVM libunwind development files +Provides: llvm-libunwind(major) = %{maj_ver} +Requires: llvm-libunwind = %{version}-%{release} + +%description -n llvm-libunwind-devel +Unversioned shared library for LLVM libunwind + +%package -n llvm-libunwind-static +Summary: Static library for LLVM libunwind + +%description -n llvm-libunwind-static +Static library for LLVM libunwind. +%endif +##endregion libcxx packages + + +##region BOLT packages +%if %{with build_bolt} +%package -n llvm-bolt +Summary: A post-link optimizer developed to speed up large applications +License: Apache-2.0 WITH LLVM-exception +URL: https://github.com/llvm/llvm-project/tree/main/bolt +Requires: llvm-filesystem = %{version}-%{release} +Recommends: gperftools-devel + +%description -n llvm-bolt + +BOLT is a post-link optimizer developed to speed up large applications. +It achieves the improvements by optimizing application's code layout based on +execution profile gathered by sampling profiler, such as Linux `perf` tool. +%endif +##endregion BOLT packages + + +##region polly packages +%if %{with polly} +%package -n polly +Summary: LLVM Framework for High-Level Loop and Data-Locality Optimizations +License: Apache-2.0 WITH LLVM-exception +URL: http://polly.llvm.org +Requires: llvm-libs = %{version}-%{release} + +%description -n polly +Polly is a high-level loop and data-locality optimizer and optimization +infrastructure for LLVM. It uses an abstract mathematical representation based +on integer polyhedron to analyze and optimize the memory access pattern of a +program. + +%package -n polly-devel +Summary: Polly header files +Requires: polly = %{version}-%{release} + +%description -n polly-devel +Polly header files. +%endif +##endregion polly packages + +##endregion packages + %prep -%setup -T -q -b 1 -n cmake-%{version}.src -cd .. -mv cmake-%{version}.src cmake +%autosetup -p1 -n llvm-project-%{version}.src + +%py3_shebang_fix \ + llvm/test/BugPoint/compile-custom.ll.py \ + llvm/tools/opt-viewer/*.py \ + llvm/utils/update_cc_test_checks.py + +%py3_shebang_fix \ + clang-tools-extra/clang-tidy/tool/ \ + clang-tools-extra/clang-include-fixer/find-all-symbols/tool/run-find-all-symbols.py + +%py3_shebang_fix \ + clang/tools/clang-format/ \ + clang/tools/clang-format/git-clang-format \ + clang/utils/hmaptool/hmaptool \ + clang/tools/scan-view/bin/scan-view \ + clang/tools/scan-view/share/Reporter.py \ + clang/tools/scan-view/share/startfile.py \ + clang/tools/scan-build-py/bin/* \ + clang/tools/scan-build-py/libexec/* + +%py3_shebang_fix compiler-rt/lib/hwasan/scripts/hwasan_symbolize -%setup -T -q -b 2 -n third-party-%{version}.src -cd .. -mv third-party-%{version}.src third-party +%if %{with libcxx} +%py3_shebang_fix libcxx/utils/ +%endif -%setup -T -q -b 0 -n llvm-%{version}.src -%autopatch -p2 -pathfix.py -i %{__python3} -p -n -k -as \ - test/BugPoint/compile-custom.ll.py \ - tools/opt-viewer/*.py \ - utils/update_cc_test_checks.py %build -%global _lto_cflags -flto=thin - -%cmake -G Ninja \ - -DCMAKE_C_FLAGS="-gdwarf-4" \ - -DCMAKE_CXX_FLAGS="-gdwarf-4" \ - -DBUILD_SHARED_LIBS:BOOL=OFF \ - -DLLVM_PARALLEL_LINK_JOBS=1 \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_SKIP_RPATH:BOOL=ON \ - -DLLVM_LIBDIR_SUFFIX=64 \ - -DLLVM_TARGETS_TO_BUILD="all" \ - -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD='' \ - -DLLVM_ENABLE_LIBCXX:BOOL=OFF \ - -DLLVM_ENABLE_ZLIB:BOOL=ON \ - -DLLVM_ENABLE_FFI:BOOL=ON \ - -DLLVM_ENABLE_RTTI:BOOL=ON \ - -DLLVM_USE_PERF:BOOL=ON \ - -DLLVM_BINUTILS_INCDIR=%{_includedir} \ - -DLLVM_BUILD_RUNTIME:BOOL=ON \ - -DLLVM_INCLUDE_TOOLS:BOOL=ON \ - -DLLVM_BUILD_TOOLS:BOOL=ON \ - -DLLVM_INCLUDE_TESTS:BOOL=ON \ - -DLLVM_BUILD_TESTS:BOOL=ON \ - -DLLVM_INSTALL_GTEST:BOOL=ON \ - -DLLVM_LIT_ARGS=-v \ - -DLLVM_INCLUDE_EXAMPLES:BOOL=ON \ - -DLLVM_BUILD_EXAMPLES:BOOL=OFF \ - -DLLVM_INCLUDE_UTILS:BOOL=ON \ - -DLLVM_INSTALL_UTILS:BOOL=ON \ - -DLLVM_UTILS_INSTALL_DIR:PATH=%{_bindir} \ - -DLLVM_TOOLS_INSTALL_DIR:PATH=bin \ - -DLLVM_INCLUDE_DOCS:BOOL=ON \ - -DLLVM_BUILD_DOCS:BOOL=ON \ - -DLLVM_ENABLE_SPHINX:BOOL=ON \ - -DLLVM_ENABLE_DOXYGEN:BOOL=OFF \ - -DLLVM_VERSION_SUFFIX='' \ - -DLLVM_BUILD_LLVM_DYLIB:BOOL=ON \ - -DLLVM_LINK_LLVM_DYLIB:BOOL=ON \ - -DLLVM_BUILD_EXTERNAL_COMPILER_RT:BOOL=ON \ - -DLLVM_INSTALL_TOOLCHAIN_ONLY:BOOL=OFF \ - -DLLVM_DEFAULT_TARGET_TRIPLE=%{_host} \ - -DSPHINX_WARNINGS_AS_ERRORS=OFF \ - -DCMAKE_INSTALL_PREFIX=/usr \ - -DLLVM_INSTALL_SPHINX_HTML_DIR=%{_pkgdocdir}/html \ - -DSPHINX_EXECUTABLE=%{_bindir}/sphinx-build-3 \ - -DLLVM_INCLUDE_BENCHMARKS=OFF \ - -DLLVM_UNITTEST_LINK_FLAGS="-Wl,-plugin-opt=O0" - -# Build libLLVM.so first to help reduce OOM errors during concurrent building. +%global reduce_debuginfo 1 + +%if %reduce_debuginfo == 1 +# Decrease debuginfo verbosity to reduce memory consumption during final library linking +%global optflags %(echo %{optflags} | sed 's/-g /-g1 /') +%endif + +%global projects clang;clang-tools-extra;lld +%global runtimes compiler-rt;openmp + +%if %{with lldb} +%global projects %{projects};lldb +%endif + +%if %{with mlir} +%global projects %{projects};mlir +%endif + +%if %{with flang} +%global projects %{projects};flang +%endif + +%if %{with build_bolt} +%global projects %{projects};bolt +%endif + +%if %{with polly} +%global projects %{projects};polly +%endif + +%if %{with libcxx} +%global runtimes %{runtimes};libcxx;libcxxabi;libunwind +%endif + +%global cfg_file_content --gcc-triple=%{_target_cpu}-linux-gnu + +%global cfg_file_content %{cfg_file_content} -gdwarf-4 -g0 + +# Copy CFLAGS into ASMFLAGS, so -fcf-protection is used when compiling assembly files. +export ASMFLAGS="%{build_cflags}" + +# Disable dwz on aarch64, because it takes a huge amount of time to decide not to optimize things. +# This is copied from clang. +%ifarch aarch64 +%define _find_debuginfo_dwz_opts %{nil} +%endif + +cd llvm + +# Remember old values to reset to +OLD_PATH="$PATH" +OLD_LD_LIBRARY_PATH="$LD_LIBRARY_PATH" +OLD_CWD="$PWD" + +%global builddir_instrumented $RPM_BUILD_DIR/instrumented-llvm + +##region LLVM lit +%if %{with python_lit} +pushd utils/lit +%py3_build +popd +%endif +##endregion LLVM lit + + +##region cmake options + +# Any ABI-affecting flags should be in here. +%global cmake_common_args \\\ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\ + -DLLVM_ENABLE_EH=ON \\\ + -DLLVM_ENABLE_RTTI=ON \\\ + -DLLVM_USE_PERF=ON \\\ + -DLLVM_TARGETS_TO_BUILD=%{targets_to_build} \\\ + -DBUILD_SHARED_LIBS=OFF \\\ + -DLLVM_BUILD_LLVM_DYLIB=ON \\\ + -DLLVM_LINK_LLVM_DYLIB=ON \\\ + -DCLANG_LINK_CLANG_DYLIB=ON \\\ + -DLLVM_ENABLE_FFI:BOOL=ON \\\ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \\\ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + +%global cmake_config_args %{cmake_common_args} + +##region clang options +%global cmake_config_args %{cmake_config_args} \\\ + -DCLANG_BUILD_EXAMPLES:BOOL=OFF \\\ + -DCLANG_CONFIG_FILE_SYSTEM_DIR=%{_sysconfdir}/clang/ \\\ + -DCLANG_DEFAULT_PIE_ON_LINUX=OFF \\\ + -DCLANG_DEFAULT_UNWINDLIB=libgcc \\\ + -DCLANG_ENABLE_ARCMT:BOOL=ON \\\ + -DCLANG_ENABLE_STATIC_ANALYZER:BOOL=ON \\\ + -DCLANG_INCLUDE_DOCS:BOOL=ON \\\ + -DCLANG_INCLUDE_TESTS:BOOL=ON \\\ + -DCLANG_PLUGIN_SUPPORT:BOOL=ON \\\ + -DCLANG_REPOSITORY_STRING="%{?dist_vendor} %{version}-%{release}" \\\ + -DLLVM_EXTERNAL_CLANG_TOOLS_EXTRA_SOURCE_DIR=../clang-tools-extra \\\ + -DCLANG_RESOURCE_DIR=../lib/clang/%{maj_ver} +##endregion clang options + +##region compiler-rt options +%global cmake_config_args %{cmake_config_args} \\\ + -DCOMPILER_RT_INCLUDE_TESTS:BOOL=OFF \\\ + -DCOMPILER_RT_INSTALL_PATH=%{_libdir}/clang/%{maj_ver} +##endregion compiler-rt options + +##region docs options + +# Add all *enabled* documentation targets (no doxygen but sphinx) +%global cmake_config_args %{cmake_config_args} \\\ + -DLLVM_ENABLE_DOXYGEN:BOOL=OFF \\\ + -DLLVM_ENABLE_SPHINX:BOOL=ON \\\ + -DLLVM_BUILD_DOCS:BOOL=ON + +# Configure sphinx: +# Build man-pages but no HTML docs using sphinx +%global cmake_config_args %{cmake_config_args} \\\ + -DSPHINX_EXECUTABLE=/usr/bin/sphinx-build-3 \\\ + -DSPHINX_OUTPUT_HTML:BOOL=OFF \\\ + -DSPHINX_OUTPUT_MAN:BOOL=ON \\\ + -DSPHINX_WARNINGS_AS_ERRORS=OFF +##endregion docs options + +##region lldb options +%if %{with lldb} +%ifarch ppc64le + %global cmake_config_args %{cmake_config_args} -DLLDB_TEST_USER_ARGS=--skip-category=watchpoint +%endif + +%global cmake_config_args %{cmake_config_args} -DLLDB_ENFORCE_STRICT_TEST_REQUIREMENTS:BOOL=ON + +%endif +##endregion lldb options + +##region lld options +%global cmake_config_args %{cmake_config_args} \\\ + -DLLVM_DYLIB_COMPONENTS="all" \\\ + -DLLD_INCLUDE_TESTS=ON \\\ + -DLLD_TEST_DEPENDS=ON +##endregion lld options + +##region libcxx options +%if %{with libcxx} +%global cmake_config_args %{cmake_config_args} \\\ + -DLIBCXX_ENABLE_LTO=OFF \\\ + -DLIBCXXABI_ENABLE_LTO=OFF \\\ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \\\ + -DLIBCXX_INCLUDE_BENCHMARKS=OFF \\\ + -DLIBCXX_STATICALLY_LINK_ABI_IN_STATIC_LIBRARY=ON \\\ + -DLIBCXX_ENABLE_ABI_LINKER_SCRIPT=ON \\\ + -DLIBCXXABI_USE_LLVM_UNWINDER=OFF \\\ + -DLIBUNWIND_INSTALL_INCLUDE_DIR=%{_includedir}/llvm-libunwind + +# If we don't set the .._INSTALL_LIBRARY_DIR variables, +# the *.so files will be placed in a subdirectory that includes the triple +%global cmake_config_args %{cmake_config_args} \\\ + -DLIBCXX_INSTALL_LIBRARY_DIR=%{_libdir} \\\ + -DLIBCXXABI_INSTALL_LIBRARY_DIR=%{_libdir} \\\ + -DLIBUNWIND_INSTALL_LIBRARY_DIR=%{_libdir} + +%global cmake_config_args %{cmake_config_args} \\\ + -DLIBCXX_INSTALL_INCLUDE_TARGET_DIR=%{_includedir}/c++/v1 \\\ + -DLIBCXX_INSTALL_INCLUDE_DIR=%{_includedir}/c++/v1 \\\ + -DLIBCXX_INSTALL_MODULES_DIR=%{_datadir}/libc++/v1 \\\ + -DLIBCXXABI_INSTALL_INCLUDE_DIR=%{_includedir}/c++/v1 + +%endif +##endregion libcxx options + +##region llvm options +%global cmake_config_args %{cmake_config_args} \\\ + -DLLVM_APPEND_VC_REV:BOOL=OFF \\\ + -DLLVM_BUILD_EXAMPLES:BOOL=OFF \\\ + -DLLVM_BUILD_EXTERNAL_COMPILER_RT:BOOL=ON \\\ + -DLLVM_BUILD_RUNTIME:BOOL=ON \\\ + -DLLVM_BUILD_TOOLS:BOOL=ON \\\ + -DLLVM_BUILD_TABLEGEN_COMMON=ON \\\ + -DLLVM_BUILD_UTILS:BOOL=ON \\\ + -DLLVM_DEFAULT_TARGET_TRIPLE=%{llvm_triple} \\\ + -DLLVM_ENABLE_LIBCXX:BOOL=OFF \\\ + -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON \\\ + -DLLVM_ENABLE_PROJECTS="%{projects}" \\\ + -DLLVM_ENABLE_RUNTIMES="%{runtimes}" \\\ + -DLLVM_ENABLE_ZLIB:BOOL=FORCE_ON \\\ + -DLLVM_ENABLE_ZSTD:BOOL=FORCE_ON \\\ + -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=%{experimental_targets_to_build} \\\ + -DLLVM_INCLUDE_BENCHMARKS=OFF \\\ + -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \\\ + -DLLVM_INCLUDE_TOOLS:BOOL=ON \\\ + -DLLVM_INCLUDE_UTILS:BOOL=ON \\\ + -DLLVM_INSTALL_TOOLCHAIN_ONLY:BOOL=OFF \\\ + -DLLVM_INSTALL_UTILS:BOOL=ON \\\ + -DLLVM_PARALLEL_LINK_JOBS=1 \\\ + -DLLVM_TOOLS_INSTALL_DIR:PATH=bin \\\ + -DLLVM_UNREACHABLE_OPTIMIZE:BOOL=OFF \\\ + -DLLVM_UTILS_INSTALL_DIR:PATH=bin +##endregion llvm options + +##region mlir options +%if %{with mlir} +%global cmake_config_args %{cmake_config_args} \\\ + -DMLIR_INCLUDE_DOCS:BOOL=ON \\\ + -DMLIR_INCLUDE_TESTS:BOOL=ON \\\ + -DMLIR_INCLUDE_INTEGRATION_TESTS:BOOL=OFF \\\ + -DMLIR_INSTALL_AGGREGATE_OBJECTS=OFF \\\ + -DMLIR_BUILD_MLIR_C_DYLIB=ON \\\ + -DMLIR_ENABLE_BINDINGS_PYTHON:BOOL=ON +%endif + +##region flang options +##endregion mlir options +%if %{with flang} +%global cmake_config_args %{cmake_config_args} \\\ + -DFLANG_INCLUDE_DOCS=ON +%endif +##endregion flang options + +##region openmp options +%global cmake_config_args %{cmake_config_args} \\\ + -DOPENMP_INSTALL_LIBDIR=%{_libdir} \\\ + -DLIBOMP_INSTALL_ALIASES=OFF \\\ + -DOPENMP_LIBDIR_SUFFIX=64 \\\ + -DOPENMP_INSTALL_INCLUDE_DIR=%{_libdir}/clang/%{maj_ver}/include +##endregion openmp options + +##region bolt options +%if %{with build_bolt} +%global cmake_config_args %{cmake_config_args} \\\ + -DBOLT_BUILD_TOOLS=ON +%endif +##endregion bolt options + +##region polly options +%if %{with polly} +%global cmake_config_args %{cmake_config_args} \\\ + -DLLVM_POLLY_LINK_INTO_TOOLS=OFF +%endif +##endregion polly options + +##region test options +%global cmake_config_args %{cmake_config_args} \\\ + -DLLVM_BUILD_TESTS:BOOL=ON \\\ + -DLLVM_INCLUDE_TESTS:BOOL=ON \\\ + -DLLVM_INSTALL_GTEST:BOOL=ON \\\ + -DLLVM_LIT_ARGS="-vv" \\\ + -DLLVM_INCLUDE_GOOGLETEST=ON \\\ + -DLLVM_EXTERNAL_GTEST_SOURCE_DIR=OFF + +%if %{with lto_build} + %global cmake_config_args %{cmake_config_args} -DLLVM_UNITTEST_LINK_FLAGS="-Wl,-plugin-opt=O0" +%endif +##endregion test options + +##region misc options +%global cmake_config_args %{cmake_config_args} \\\ + -DCMAKE_INSTALL_PREFIX=%{install_prefix} \\\ + -DENABLE_LINKER_BUILD_ID:BOOL=ON \\\ + -DPython3_EXECUTABLE=%{__python3} + +%global cmake_config_args %{cmake_config_args} -DCMAKE_SKIP_INSTALL_RPATH:BOOL=ON + +%global cmake_config_args %{cmake_config_args} -DPPC_LINUX_DEFAULT_IEEELONGDOUBLE=ON + +%if %reduce_debuginfo == 1 + %global cmake_config_args %{cmake_config_args} -DCMAKE_C_FLAGS_RELWITHDEBINFO="%{optflags} -DNDEBUG" + %global cmake_config_args %{cmake_config_args} -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="%{optflags} -DNDEBUG" +%endif + +%if 0%{?__isa_bits} == 64 + %global cmake_config_args %{cmake_config_args} -DLLVM_LIBDIR_SUFFIX=64 +%endif + +%if %{with gold} + %global cmake_config_args %{cmake_config_args} -DLLVM_BINUTILS_INCDIR=%{_includedir} +%endif + +%global cmake_config_args %{cmake_config_args} -DLLVM_VERSION_SUFFIX='' + +%ifarch x86_64 + %global cmake_config_args %{cmake_config_args} -DCMAKE_SHARED_LINKER_FLAGS="$LDFLAGS -Wl,-z,cet-report=error" +%endif + +##endregion misc options + +extra_cmake_args='' +# TSan does not support 5-level page tables (https://github.com/llvm/llvm-project/issues/111492) +# so do not run tests using tsan on systems that potentially use 5-level page tables. +if grep 'flags.*la57' /proc/cpuinfo; then + extra_cmake_args="$extra_cmake_args -DOPENMP_TEST_ENABLE_TSAN=OFF" +fi +##endregion cmake options + + +##region Final stage + +##region reset paths and globals +function reset_paths { + export PATH="$OLD_PATH" + export LD_LIBRARY_PATH="$OLD_LD_LIBRARY_PATH" +} +reset_paths + +cd $OLD_CWD +%global _vpath_srcdir . +%global __cmake_builddir %{_vpath_builddir} +##endregion reset paths and globals + +%global extra_cmake_opts %{nil} + +%cmake -G Ninja %{cmake_config_args} %{extra_cmake_opts} $extra_cmake_args + +# Build libLLVM.so first. This will help reduce OOM. %cmake_build --target LLVM + +# Also build libclang-cpp.so separately to avoid OOM errors. +%cmake_build --target libclang-cpp.so + +# Same for the three large MLIR dylibs. +%if %{with mlir} +%cmake_build --target libMLIR.so +%cmake_build --target libMLIR-C.so +%cmake_build --target libMLIRPythonCAPI.so +%endif + +# flang, same for avoid OOM +%if %{with flang} +%cmake_build --target flang-new +%endif + %cmake_build +%cmake_build --target runtimes +##endregion Final stage + + + %install +##region LLVM installation +pushd llvm + +%if %{with python_lit} +pushd utils/lit +%py3_install + +# Strip out #!/usr/bin/env python +sed -i -e '1{\@^#!/usr/bin/env python@d}' %{buildroot}%{python3_sitelib}/lit/*.py +popd +%endif + %cmake_install -rm -rf %{buildroot}/%{_pkgdocdir}/html -# for lit tests +popd + mkdir -p %{buildroot}/%{_bindir} -install -m 0755 %{_vpath_builddir}/bin/llvm-isel-fuzzer %{buildroot}%{_bindir} -install -m 0755 %{_vpath_builddir}/bin/llvm-opt-fuzzer %{buildroot}%{_bindir} -rm -rf test/tools/UpdateTestChecks -install %{_vpath_builddir}/lib64/libLLVMTestingSupport.a %{buildroot}%{_libdir} -install %{_vpath_builddir}/lib64/libLLVMTestingAnnotations.a %{buildroot}%{_libdir} +for f in llvm-isel-fuzzer llvm-opt-fuzzer +do + install -m 0755 llvm/%{_vpath_builddir}/bin/$f %{buildroot}%{install_bindir} + chrpath --delete %{buildroot}%{install_bindir}/$f +done + +pushd %{buildroot}/%{_bindir} +ln -s llvm-config llvm-config-%{maj_ver} +popd + +# Install libraries needed for unittests +install %{build_libdir}/libLLVMTestingSupport.a %{buildroot}%{install_libdir} +install %{build_libdir}/libLLVMTestingAnnotations.a %{buildroot}%{install_libdir} -# Add symlink to lto plugin in the binutils plugin directory -mkdir -p %{buildroot}%{_libdir}/bfd-plugins/ +%if %{with gold} +# Add symlink to lto plugin in the binutils plugin directory. +%{__mkdir_p} %{buildroot}%{_libdir}/bfd-plugins/ ln -s -t %{buildroot}%{_libdir}/bfd-plugins/ ../LLVMgold.so +%endif + +mkdir -p %{buildroot}%{install_datadir}/llvm/cmake +cp -Rv cmake/* %{buildroot}%{install_datadir}/llvm/cmake +# endregion LLVM installation + + +# region CLANG installation +pushd %{buildroot}%{_bindir} +ln -s clang++ clang++-%{maj_ver} +popd + +# Add a symlink in bindir to clang-format-diff +ln -s ../share/clang/clang-format-diff.py %{buildroot}%{install_bindir}/clang-format-diff + +# install clang python bindings +mkdir -p %{buildroot}%{python3_sitelib}/clang/ +install -p -m644 clang/bindings/python/clang/* %{buildroot}%{python3_sitelib}/clang/ +%py_byte_compile %{__python3} %{buildroot}%{python3_sitelib}/clang + +# install scanbuild-py to python sitelib. +if [ -d "%{buildroot}%{install_prefix}/lib64/libear" ] ; then + mv "%{buildroot}%{install_prefix}/lib64/libear" %{buildroot}%{python3_sitelib} +else + mv "%{buildroot}%{install_prefix}/lib/libear" %{buildroot}%{python3_sitelib} +fi +if [ -d "%{buildroot}%{install_prefix}/lib64/libscanbuild" ] ; then + mv "%{buildroot}%{install_prefix}/lib64/libscanbuild" %{buildroot}%{python3_sitelib} +else + mv "%{buildroot}%{install_prefix}/lib/libscanbuild" %{buildroot}%{python3_sitelib} +fi +# Cannot use {libear,libscanbuild} style expansion in py_byte_compile. +%py_byte_compile %{__python3} %{buildroot}%{python3_sitelib}/libear +%py_byte_compile %{__python3} %{buildroot}%{python3_sitelib}/libscanbuild + +# Move emacs integration files to the correct directory +mkdir -p %{buildroot}%{_emacs_sitestartdir} +for f in clang-format.el clang-include-fixer.el clang-rename.el; do +mv %{buildroot}{%{install_datadir}/clang,%{_emacs_sitestartdir}}/$f +done + +# Create manpage symlink for clang++ +ln -s clang-%{maj_ver}.1 %{buildroot}%{install_mandir}/man1/clang++.1 + +# Fix permissions of scan-view scripts +chmod a+x %{buildroot}%{install_datadir}/scan-view/{Reporter.py,startfile.py} + +# remove editor integrations (bbedit, sublime, emacs, vim) +rm -vf %{buildroot}%{install_datadir}/clang/clang-format-bbedit.applescript +rm -vf %{buildroot}%{install_datadir}/clang/clang-format-sublime.py* + +# Remove unpackaged files +rm -Rvf %{buildroot}%{install_datadir}/clang-doc +rm -vf %{buildroot}%{install_datadir}/clang/bash-autocomplete.sh + +# Move clang resource directory to default prefix. +mkdir -p %{buildroot}%{_libdir}/clang +mv %{buildroot}%{install_prefix}/lib/clang/%{maj_ver} %{buildroot}%{_libdir}/clang/%{maj_ver} +# Create any missing sub-directories in the clang resource directory. +mkdir -p %{buildroot}%{_libdir}/clang/%{maj_ver}/{bin,include,lib,share}/ +##endregion CLANG installation + + +##region COMPILER-RT installation +# Triple where compiler-rt libs are installed. If it differs from llvm_triple, then there is +# also a symlink llvm_triple -> compiler_rt_triple. +%global compiler_rt_triple %{llvm_triple} + +%ifarch ppc64le +# Fix install path on ppc64le so that the directory name matches the triple used +# by clang. +mv %{buildroot}%{_libdir}/clang/%{maj_ver}/lib/powerpc64le-linux-gnu %{buildroot}%{_libdir}/clang/%{maj_ver}/lib/%{llvm_triple} +%endif + +for d in %{buildroot}%{install_prefix}/%{_lib}/clang/%{maj_ver}/include/*/; do + test -d "$d" || continue + mv "$d" %{buildroot}%{_includedir}/ +done + +%ifnarch loongarch64 +mv %{buildroot}%{install_prefix}/%{_lib}/clang/%{maj_ver}/bin/* %{buildroot}%{_bindir}/ +%endif + +# create links at the same path of clang libs +mv %{buildroot}%{install_libdir}/clang/%{maj_ver}/lib/%{llvm_triple}/* %{buildroot}%{_libdir}/clang/%{maj_ver}/lib/%{llvm_triple}/ + +pushd %{buildroot}%{_libdir}/clang/%{maj_ver}/lib/ +ln -s %{llvm_triple} linux +popd + +pushd %{buildroot}%{_libdir}/clang/%{maj_ver}/lib/linux/ +for f in *.syms *.a *.so +do + base=${f%%.*} + rest=${f#*.} + ln -s -- "$f" "${base}-%{_target_cpu}.${rest}" +done +popd + +pushd %{buildroot}%{_libdir}/clang/%{maj_ver}/lib/linux/ +for i in *.a *.so +do + echo $i + ln -s linux/$i ../$i +done +popd +##endregion COMPILER-RT installation + + +##region OPENMP installation +rm -rf %{buildroot}%{install_libdir}/libarcher_static.a +rm -rf %{buildroot}/%{install_datadir}/gdb +##endregion OPENMP installation + + +##region LLD installation +rm -rf lld/test/%{_arch}.site.cfg.py +head -n -1 llvm/%{__cmake_builddir}/tools/lld/test/lit.site.cfg.py >> lld/test/%{_arch}.site.cfg.py +# Patch lit config files to load custom config: +for f in lld/test/%{_arch}.site.cfg.py ; do + echo "lit_config.load_config(config, '%{_datadir}/lld/lit.lld-test.cfg.py')" >> $f +done + +# Install test files +install -d %{buildroot}%{_datadir}/lld/src +cp %{SOURCE1332} %{buildroot}%{_datadir}/lld/ + +mv lld/test/%{_arch}.site.cfg.py %{buildroot}%{_datadir}/lld/src/%{_arch}.site.cfg.py +tar --sort=name --mtime='UTC 2020-01-01' -c lld/test/ | gzip -n > %{buildroot}%{_datadir}/lld/src/test.tar.gz + +install -d %{buildroot}%{_libexecdir}/tests/lld +install -m 0755 %{SOURCE1331} %{buildroot}%{_libexecdir}/tests/lld +install -d %{buildroot}%{_libdir}/lld/ + +touch %{buildroot}%{_bindir}/ld + +install -D -m 644 -t %{buildroot}%{install_mandir}/man1/ lld/docs/ld.lld.1 +##endregion LLD installation + +##region LLDB installation +%if %{with lldb} +# Move python package out of llvm prefix. +mkdir -p %{buildroot}%{python3_sitearch} +mv %{buildroot}%{install_prefix}/%{_lib}/python%{python3_version}/site-packages/lldb %{buildroot}/%{python3_sitearch} +rmdir %{buildroot}%{install_prefix}/%{_lib}/python%{python3_version}/site-packages +rmdir %{buildroot}%{install_prefix}/%{_lib}/python%{python3_version} + +# python: fix binary libraries location +liblldb=$(basename $(readlink -e %{buildroot}%{install_libdir}/liblldb.so)) +ln -vsf "../../../${liblldb}" %{buildroot}%{python3_sitearch}/lldb/_lldb.so +%py_byte_compile %{__python3} %{buildroot}%{python3_sitearch}/lldb +%endif +##endregion LLDB installation + +##region mlir installation +%if %{with mlir} +mkdir -p %{buildroot}/%{python3_sitearch} +mv %{buildroot}%{install_prefix}/python_packages/mlir_core/mlir %{buildroot}/%{python3_sitearch} +# These directories should be empty now. +rmdir %{buildroot}%{install_prefix}/python_packages/mlir_core %{buildroot}%{install_prefix}/python_packages +# Unneeded files. +rm -rf %{buildroot}%{install_prefix}/src/python +%endif +##endregion mlir installation + +##region flang installation +%if %{with flang} +rm %{buildroot}/%{_libdir}/libFIRAnalysis.so \ + %{buildroot}/%{_libdir}/libFIRBuilder.so \ + %{buildroot}/%{_libdir}/libFIRCodeGen.so \ + %{buildroot}/%{_libdir}/libFIRDialect.so \ + %{buildroot}/%{_libdir}/libFIRDialectSupport.so \ + %{buildroot}/%{_libdir}/libFIROpenACCSupport.so \ + %{buildroot}/%{_libdir}/libFIRSupport.so \ + %{buildroot}/%{_libdir}/libFIRTestAnalysis.so \ + %{buildroot}/%{_libdir}/libFIRTestOpenACCInterfaces.so \ + %{buildroot}/%{_libdir}/libFIRTransforms.so \ + %{buildroot}/%{_libdir}/libflangFrontend.so \ + %{buildroot}/%{_libdir}/libflangFrontendTool.so \ + %{buildroot}/%{_libdir}/libflangPasses.so \ + %{buildroot}/%{_libdir}/libFlangOpenMPTransforms.so \ + %{buildroot}/%{_libdir}/libFortranCommon.so \ + %{buildroot}/%{_libdir}/libFortranEvaluate.so \ + %{buildroot}/%{_libdir}/libFortranLower.so \ + %{buildroot}/%{_libdir}/libFortranParser.so \ + %{buildroot}/%{_libdir}/libFortranSemantics.so \ + %{buildroot}/%{_libdir}/libFortranSupport.so \ + %{buildroot}/%{_libdir}/libHLFIRDialect.so \ + %{buildroot}/%{_libdir}/libHLFIRTransforms.so +find %{buildroot}/%{_includedir}/flang -type f -a ! -iname '*.mod' -delete + +# this is a test binary +rm %{buildroot}%{_bindir}/f18-parse-demo +%endif +##endregion flang installation + +##region libcxx installation +%if %{with libcxx} +# We can't install the unversionned path on default location because that would conflict with libunwind +# +# The versionned path has a different soname (libunwind.so.1 compared to +# libunwind.so.8) so they can live together in %%{_libdir} +# +# ABI wise, even though llvm-libunwind's library is named libunwind, it doesn't +# have the exact same ABI as gcc's libunwind (it actually provides a subset). +rm %{buildroot}%{_libdir}/libunwind.so +mkdir -p %{buildroot}/%{_libdir}/llvm-unwind/ + +pushd %{buildroot}/%{_libdir}/llvm-unwind +ln -s ../libunwind.so.1.0 libunwind.so +popd +%endif +##endregion libcxx installation + +##region BOLT installation +# We don't ship libLLVMBOLT*.a +rm -f %{buildroot}%{install_libdir}/libLLVMBOLT*.a +##endregion BOLT installation + +# Move files from src to dest and replace the old files in src with relative +# symlinks. +move_and_replace_with_symlinks() { + local src="$1" + local dest="$2" + mkdir -p "$dest" + + # Change to source directory to simplify relative paths + (cd "$src" && \ + find * -type d -exec mkdir -p "$dest/{}" \; && \ + find * \( -type f -o -type l \) -exec mv "$src/{}" "$dest/{}" \; \ + -exec ln -s --relative "$dest/{}" "$src/{}" \;) +} + +# Move files from the llvm prefix to the system prefix +mkdir -p %{buildroot}%{_bindir} +mkdir -p %{buildroot}%{_libdir} +mkdir -p %{buildroot}%{_libexecdir} +mkdir -p %{buildroot}%{_includedir} +mkdir -p %{buildroot}%{_datadir} +cp -rf %{buildroot}%{install_bindir}/* %{buildroot}%{_bindir}/ +cp -rf %{buildroot}%{install_libdir}/* %{buildroot}%{_libdir}/ +cp -rf %{buildroot}%{install_libexecdir}/* %{buildroot}%{_libexecdir}/ +cp -rf %{buildroot}%{install_includedir}/* %{buildroot}%{_includedir}/ +cp -rf %{buildroot}%{install_datadir}/* %{buildroot}%{_datadir}/ +rm -rf %{buildroot}%{install_mandir} +rm -rf %{buildroot}%{_includedir}/clang-tidy +rm -rf %{buildroot}%{install_includedir} +rm -rf %{buildroot}%{install_libdir}/* +rm -rf %{buildroot}%{install_datadir}/ +rm -rf %{buildroot}%{install_libexecdir}/ +rm -rf %{buildroot}%{install_bindir} + +# remove clang-docs +rm -f %{buildroot}%{_datadir}/clang/*.css +rm -f %{buildroot}%{_datadir}/clang/*.js -(cd %{buildroot}/%{_bindir} ; ln -s llvm-config llvm-config-%{maj_ver}) +# clean openmp +rm -f %{buildroot}%{_bindir}/llvm-omp* +rm -f %{buildroot}%{_libdir}/*.bc +##endregion clean + +# clean bolt +rm -f %{buildroot}%{_libdir}/libbolt_rt_instr_osx.a + +# clean redundant manpages +rm -f %{buildroot}%{_mandir}/man1/*-%{maj_ver}.1 +rm -f %{buildroot}%{_mandir}/man1/extraclangtools.1 -cp -Rv ../cmake/Modules/* %{buildroot}%{_libdir}/cmake/llvm -mkdir -p %{buildroot}%{_datadir}/llvm/cmake -cp -Rv ../cmake/* %{buildroot}%{_datadir}/llvm/cmake %check +cd llvm + +%if %{with check} +%cmake_test || true +%endif + +# Do this here instead of in install so the check targets are also included. +cp %{_vpath_builddir}/.ninja_log %{buildroot}%{_datadir} + + -LD_LIBRARY_PATH=%{buildroot}/%{_libdir} %{__ninja} check-all -C %{_vpath_builddir} +%post -n lld +update-alternatives --install %{_bindir}/ld ld %{_bindir}/ld.lld 1 +%postun -n lld +if [ $1 -eq 0 ] ; then + update-alternatives --remove ld %{_bindir}/ld.lld +fi -%files + + +##region files +##region LLVM lit files +%if %{with python_lit} +%files -n python3-lit +%license llvm/utils/lit/LICENSE.TXT +%doc llvm/utils/lit/README.rst +%{_bindir}/lit +%{python3_sitelib}/lit/ +%{python3_sitelib}/lit-*-info/ +%endif +##endregion LLVM lit files + + +##region LLVM files +%files -n llvm %license LICENSE.TXT -%{_bindir}/* -%exclude %{_bindir}/llvm-config -%exclude %{_bindir}/llvm-config-%{maj_ver} -%exclude %{_bindir}/not -%exclude %{_bindir}/count -%exclude %{_bindir}/yaml-bench -%exclude %{_bindir}/lli-child-target -%exclude %{_bindir}/llvm-isel-fuzzer -%exclude %{_bindir}/llvm-opt-fuzzer -%{_datadir}/opt-viewer -%{_mandir}/man1/* -%exclude %{_mandir}/man1/llvm-config* +%{_bindir}/bugpoint +%{_bindir}/dsymutil +%{_bindir}/FileCheck +%{_bindir}/llc +%{_bindir}/lli +%{_bindir}/llvm-addr2line +%{_bindir}/llvm-ar +%{_bindir}/llvm-as +%{_bindir}/llvm-bcanalyzer +%{_bindir}/llvm-bitcode-strip +%{_bindir}/llvm-c-test +%{_bindir}/llvm-cat +%{_bindir}/llvm-cfi-verify +%{_bindir}/llvm-cov +%{_bindir}/llvm-cvtres +%{_bindir}/llvm-cxxdump +%{_bindir}/llvm-cxxfilt +%{_bindir}/llvm-cxxmap +%{_bindir}/llvm-debuginfo-analyzer +%{_bindir}/llvm-debuginfod +%{_bindir}/llvm-debuginfod-find +%{_bindir}/llvm-diff +%{_bindir}/llvm-dis +%{_bindir}/llvm-dlltool +%{_bindir}/llvm-dwarfdump +%{_bindir}/llvm-dwarfutil +%{_bindir}/llvm-dwp +%{_bindir}/llvm-exegesis +%{_bindir}/llvm-extract +%{_bindir}/llvm-gsymutil +%{_bindir}/llvm-ifs +%{_bindir}/llvm-install-name-tool +%{_bindir}/llvm-jitlink +%{_bindir}/llvm-jitlink-executor +%{_bindir}/llvm-lib +%{_bindir}/llvm-libtool-darwin +%{_bindir}/llvm-link +%{_bindir}/llvm-lipo +%{_bindir}/llvm-lto +%{_bindir}/llvm-lto2 +%{_bindir}/llvm-mc +%{_bindir}/llvm-mca +%{_bindir}/llvm-ml +%{_bindir}/llvm-modextract +%{_bindir}/llvm-mt +%{_bindir}/llvm-nm +%{_bindir}/llvm-objcopy +%{_bindir}/llvm-objdump +%{_bindir}/llvm-opt-report +%{_bindir}/llvm-otool +%{_bindir}/llvm-pdbutil +%{_bindir}/llvm-PerfectShuffle +%{_bindir}/llvm-profdata +%{_bindir}/llvm-profgen +%{_bindir}/llvm-ranlib +%{_bindir}/llvm-rc +%{_bindir}/llvm-readelf +%{_bindir}/llvm-readobj +%{_bindir}/llvm-reduce +%{_bindir}/llvm-remark-size-diff +%{_bindir}/llvm-remarkutil +%{_bindir}/llvm-rtdyld +%{_bindir}/llvm-sim +%{_bindir}/llvm-size +%{_bindir}/llvm-split +%{_bindir}/llvm-stress +%{_bindir}/llvm-strings +%{_bindir}/llvm-strip +%{_bindir}/llvm-symbolizer +%{_bindir}/llvm-tapi-diff +%{_bindir}/llvm-tblgen +%{_bindir}/llvm-tli-checker +%{_bindir}/llvm-undname +%{_bindir}/llvm-windres +%{_bindir}/llvm-xray +%{_bindir}/obj2yaml +%{_bindir}/opt +%{_bindir}/sancov +%{_bindir}/sanstats +%{_bindir}/split-file +%{_bindir}/UnicodeNameMappingGenerator +%{_bindir}/verify-uselistorder +%{_bindir}/yaml2obj +%{_mandir}/man1/bugpoint* +%{_mandir}/man1/clang-tblgen* +%{_mandir}/man1/dsymutil* +%{_mandir}/man1/FileCheck* +%{_mandir}/man1/lit* +%{_mandir}/man1/llc* +%{_mandir}/man1/lldb-tblgen* +%{_mandir}/man1/lli* +%{_mandir}/man1/llvm-addr2line* +%{_mandir}/man1/llvm-ar* +%{_mandir}/man1/llvm-as* +%{_mandir}/man1/llvm-bcanalyzer* +%{_mandir}/man1/llvm-cov* +%{_mandir}/man1/llvm-cxxfilt* +%{_mandir}/man1/llvm-cxxmap* +%{_mandir}/man1/llvm-debuginfo-analyzer* +%{_mandir}/man1/llvm-diff* +%{_mandir}/man1/llvm-dis* +%{_mandir}/man1/llvm-dwarfdump* +%{_mandir}/man1/llvm-dwarfutil* +%{_mandir}/man1/llvm-exegesis* +%{_mandir}/man1/llvm-extract* +%{_mandir}/man1/llvm-ifs* +%{_mandir}/man1/llvm-install-name-tool* +%{_mandir}/man1/llvm-lib.1 +%{_mandir}/man1/llvm-libtool-darwin.1 +%{_mandir}/man1/llvm-link* +%{_mandir}/man1/llvm-lipo* +%{_mandir}/man1/llvm-locstats* +%{_mandir}/man1/llvm-mc.1 +%{_mandir}/man1/llvm-mca.1 +%{_mandir}/man1/llvm-nm* +%{_mandir}/man1/llvm-objcopy* +%{_mandir}/man1/llvm-objdump* +%{_mandir}/man1/llvm-opt-report* +%{_mandir}/man1/llvm-otool* +%{_mandir}/man1/llvm-pdbutil* +%{_mandir}/man1/llvm-profdata* +%{_mandir}/man1/llvm-profgen* +%{_mandir}/man1/llvm-ranlib* +%{_mandir}/man1/llvm-readelf* +%{_mandir}/man1/llvm-readobj* +%{_mandir}/man1/llvm-reduce* +%{_mandir}/man1/llvm-remarkutil* +%{_mandir}/man1/llvm-remark-size-diff* +%{_mandir}/man1/llvm-size* +%{_mandir}/man1/llvm-stress* +%{_mandir}/man1/llvm-strings* +%{_mandir}/man1/llvm-strip* +%{_mandir}/man1/llvm-symbolizer* +%{_mandir}/man1/llvm-tblgen* +%{_mandir}/man1/llvm-tli-checker* +%{_mandir}/man1/mlir-tblgen* +%{_mandir}/man1/opt* +%{_mandir}/man1/tblgen* +%{_datadir}/opt-viewer/ -%files libs +%files -n llvm-libs %license LICENSE.TXT %{_libdir}/libLLVM-%{maj_ver}.so %{_libdir}/libLLVM-%{maj_ver}.%{min_ver}*.so -%{_libdir}/LLVMgold.so -%{_libdir}/bfd-plugins/LLVMgold.so %{_libdir}/libLTO.so* %{_libdir}/libRemarks.so* +%if %{with gold} +%{_libdir}/LLVMgold.so +%{_libdir}/bfd-plugins/LLVMgold.so +%endif -%files devel +%files -n llvm-devel %license LICENSE.TXT %{_bindir}/llvm-config %{_bindir}/llvm-config-%{maj_ver} %{_libdir}/libLLVM.so -%{_libdir}/cmake/llvm -%{_libdir}/*.a -%exclude %{_libdir}/libLLVMTestingSupport.a -%{_includedir}/llvm -%{_includedir}/llvm-c +%{_libdir}/cmake/llvm/ %{_mandir}/man1/llvm-config* +%{_includedir}/llvm/ +%{_includedir}/llvm-c/ + +%files -n llvm-static +%license LICENSE.TXT +%{_libdir}/libLLVM*.a +%exclude %{_libdir}/libLLVMTestingSupport.a +%exclude %{_libdir}/libLLVMTestingAnnotations.a -%files cmake-utils +%files -n llvm-cmake-utils %license LICENSE.TXT -%{_datadir}/llvm/cmake +%{_datadir}/llvm/ -%files test +%files -n llvm-test %license LICENSE.TXT %{_bindir}/not %{_bindir}/count @@ -246,62 +1697,422 @@ LD_LIBRARY_PATH=%{buildroot}/%{_libdir} %{__ninja} check-all -C %{_vpath_buildd %{_bindir}/lli-child-target %{_bindir}/llvm-isel-fuzzer %{_bindir}/llvm-opt-fuzzer + +%files -n llvm-googletest +%license LICENSE.TXT %{_libdir}/libLLVMTestingSupport.a %{_libdir}/libLLVMTestingAnnotations.a %{_libdir}/libllvm_gtest.a %{_libdir}/libllvm_gtest_main.a %{_includedir}/llvm-gtest %{_includedir}/llvm-gmock +##endregion LLVM files -%changelog -* Mon Jun 23 2025 chenli - 17.0.6-10 -- LoongArch Backport: Pass OptLevel to LoongArchDAGToDAGISel -* Thu Sep 26 2024 OpenCloudOS Release Engineering - 17.0.6-9 -- Rebuilt for clarifying the packages requirement in BaseOS and AppStream +##region CLANG files +%files -n clang +%license LICENSE.TXT +%{_bindir}/clang +%{_bindir}/clang++ +%{_bindir}/clang-%{maj_ver} +%{_bindir}/clang++-%{maj_ver} +%{_bindir}/clang-cl +%{_bindir}/clang-cpp +%{_mandir}/man1/clang.1 +%{_mandir}/man1/clang++.1 + +%files -n clang-libs +%license LICENSE.TXT +%{_libdir}/clang/%{maj_ver}/include/* +%{_libdir}/libclang-cpp.so.* +%{_libdir}/libclang.so.* + +%files -n clang-devel +%license LICENSE.TXT +%{_bindir}/clang-tblgen +%{_libdir}/libclang.so +%{_libdir}/libclang-cpp.so +%{_includedir}/clang/ +%{_includedir}/clang-c/ +%{_libdir}/cmake/clang/ +%dir %{_datadir}/clang/ + +%files -n clang-resource-filesystem +%license LICENSE.TXT +%dir %{_libdir}/clang/ + +%files -n clang-analyzer +%license LICENSE.TXT +%{_bindir}/scan-view +%{_bindir}/scan-build +%{_bindir}/analyze-build +%{_bindir}/intercept-build +%{_bindir}/scan-build-py +%{_libexecdir}/ccc-analyzer +%{_libexecdir}/c++-analyzer +%{_libexecdir}/analyze-c++ +%{_libexecdir}/analyze-cc +%{_libexecdir}/intercept-c++ +%{_libexecdir}/intercept-cc +%{_datadir}/scan-view/ +%{_datadir}/scan-build/ +%{_mandir}/man1/scan-build.1 +%{python3_sitelib}/libear/ +%{python3_sitelib}/libscanbuild/ + +%files -n clang-tools-extra +%license LICENSE.TXT +%{_bindir}/amdgpu-arch +%{_bindir}/clang-apply-replacements +%{_bindir}/clang-change-namespace +%{_bindir}/clang-check +%{_bindir}/clang-doc +%{_bindir}/clang-extdef-mapping +%{_bindir}/clang-format +%{_bindir}/clang-include-cleaner +%{_bindir}/clang-include-fixer +%{_bindir}/clang-move +%{_bindir}/clang-offload-bundler +%{_bindir}/clang-offload-packager +%{_bindir}/clang-linker-wrapper +%{_bindir}/clang-pseudo +%{_bindir}/clang-query +%{_bindir}/clang-refactor +%{_bindir}/clang-rename +%{_bindir}/clang-reorder-fields +%{_bindir}/clang-repl +%{_bindir}/clang-scan-deps +%{_bindir}/clang-tidy +%{_bindir}/clangd +%{_bindir}/diagtool +%{_bindir}/hmaptool +%{_bindir}/nvptx-arch +%{_bindir}/pp-trace +%{_bindir}/c-index-test +%{_bindir}/find-all-symbols +%{_bindir}/modularize +%{_bindir}/clang-format-diff +%{_mandir}/man1/diagtool.1 +%{_emacs_sitestartdir}/clang-format.el +%{_emacs_sitestartdir}/clang-include-fixer.el +%{_emacs_sitestartdir}/clang-rename.el +%{_datadir}/clang/clang-format.py* +%{_datadir}/clang/clang-format-diff.py* +%{_datadir}/clang/clang-include-fixer.py* +%{_datadir}/clang/clang-tidy-diff.py* +%{_bindir}/run-clang-tidy +%{_datadir}/clang/run-find-all-symbols.py* +%{_datadir}/clang/clang-rename.py* -* Tue Sep 24 2024 zhanglimin - 17.0.6-8 -- [LoongArch] Backport some new support +%files -n git-clang-format +%license LICENSE.TXT +%{_bindir}/git-clang-format -* Thu Sep 12 2024 doupengda - 17.0.6-7 -- [Type] other -- [DESC] Clear instructions not recorded in ErasedInstrs +%files -n python3-clang +%license LICENSE.TXT +%{python3_sitelib}/clang/ +##endregion CLANG files -* Mon Sep 02 2024 Zhao Zhen - 17.0.6-6 -- Deprecated python3-recommonmark from builddep -* Fri Aug 16 2024 OpenCloudOS Release Engineering - 17.0.6-5 -- Rebuilt for loongarch release +##region COMPILER-RT files +%files -n compiler-rt +%license compiler-rt/LICENSE.TXT +%ifarch x86_64 aarch64 riscv64 +%{_bindir}/hwasan_symbolize +%endif +%{_includedir}/fuzzer/ +%{_includedir}/orc/ +%{_includedir}/profile/ +%{_includedir}/sanitizer/ +%{_includedir}/xray/ +%{_libdir}/clang/%{maj_ver}/share/*.txt +%{_libdir}/clang/%{maj_ver}/lib/ -* Tue May 28 2024 zhanglimin - 17.0.6-4 -- Fix the issue for patching files. -* Thu May 16 2024 zhanglimin - 17.0.6-3 -- Support `relax`, vector, some improvements and bugfixs on LoongArch. +##region OPENMP files +%files -n libomp +%license openmp/LICENSE.TXT +%{_libdir}/libomp.so +%{_libdir}/libompd.so +%{_libdir}/libarcher.so +%ifnarch loongarch64 +%{_libdir}/libomptarget.rtl.amdgpu.so.%{maj_ver} +%{_libdir}/libomptarget.rtl.cuda.so.%{maj_ver} +%{_libdir}/libomptarget.rtl.%{_arch}.so.%{maj_ver} +%endif +%{_libdir}/libomptarget.so.%{maj_ver} -* Mon Apr 01 2024 rockerzhu - 17.0.6-2 -- Fix CVE-2023-46049. +%files -n libomp-devel +%license openmp/LICENSE.TXT +%{_libdir}/clang/%{maj_ver}/include/omp.h +%{_libdir}/cmake/openmp/ +%{_libdir}/clang/%{maj_ver}/include/omp-tools.h +%{_libdir}/clang/%{maj_ver}/include/ompt.h +%{_libdir}/clang/%{maj_ver}/include/ompt-multiplex.h +%ifnarch loongarch64 +%{_libdir}/libomptarget.rtl.amdgpu.so +%{_libdir}/libomptarget.rtl.cuda.so +%{_libdir}/libomptarget.rtl.%{_arch}.so +%endif +%{_libdir}/libomptarget.devicertl.a +%{_libdir}/libomptarget.so +##endregion OPENMP files -* Mon Dec 18 2023 luffyluo - 17.0.6-1 -- Upgrade to version 17.0.6 -* Fri Sep 08 2023 OpenCloudOS Release Engineering - 16.0.6-2 -- Rebuilt for OpenCloudOS Stream 23.09 +##region LLD files +%files -n lld +%license LICENSE.TXT +%ghost %{_bindir}/ld +%{_bindir}/ld.lld +%{_bindir}/ld64.lld +%{_bindir}/lld +%{_bindir}/lld-link +%{_bindir}/wasm-ld +%{_mandir}/man1/ld.lld.1 + +%files -n lld-devel +%license LICENSE.TXT +%{_libdir}/liblldCOFF.so +%{_libdir}/liblldCommon.so +%{_libdir}/liblldELF.so +%{_libdir}/liblldMachO.so +%{_libdir}/liblldMinGW.so +%{_libdir}/liblldWasm.so +%{_includedir}/lld/ +%{_libdir}/cmake/lld/ -* Fri Jun 30 2023 kianli - 16.0.6-1 -- Upgrade to 16.0.6 +%files -n lld-libs +%license LICENSE.TXT +%{_libdir}/liblldCOFF.so.%{maj_ver} +%{_libdir}/liblldCommon.so.%{maj_ver} +%{_libdir}/liblldELF.so.%{maj_ver} +%{_libdir}/liblldMachO.so.%{maj_ver} +%{_libdir}/liblldMinGW.so.%{maj_ver} +%{_libdir}/liblldWasm.so.%{maj_ver} -* Fri Apr 28 2023 OpenCloudOS Release Engineering - 14.0.5-4 -- Rebuilt for OpenCloudOS Stream 23.05 +%files -n lld-test +%license LICENSE.TXT +%{_libexecdir}/tests/lld/ +%{_libdir}/lld/ +%{_datadir}/lld/src/test.tar.gz +%{_datadir}/lld/src/%{_arch}.site.cfg.py +%{_datadir}/lld/lit.lld-test.cfg.py +##endregion LLD files -* Fri Mar 31 2023 OpenCloudOS Release Engineering - 14.0.5-3 -- Rebuilt for OpenCloudOS Stream 23 -* Thu Jan 05 2023 rockerzhu - 14.0.5-2 -- Disable INSTANTIATE_TEST_SUITE_P on aarch64 due to failing in compiling. +##region LLDB files +%if %{with lldb} +%files -n lldb +%license LICENSE.TXT +%{_bindir}/lldb +%{_bindir}/lldb-argdumper +%{_bindir}/lldb-instr +%{_bindir}/lldb-server +%{_bindir}/lldb-vscode +%{_libdir}/liblldb.so.* +%{_libdir}/liblldbIntelFeatures.so.* +%{_mandir}/man1/lldb.1 +%{_mandir}/man1/lldb-server.1 -* Wed Jun 15 2022 metrechen - 14.0.5-1 -- Release tllvm, based on LLVM 14.0.5 +%files -n lldb-devel +%license LICENSE.TXT +%{_includedir}/lldb/ +%{_libdir}/liblldb.so +%{_libdir}/liblldbIntelFeatures.so -* Thu Jun 02 2022 rockerzhu - 14.0.0-1 -- Initial build +%files -n python3-lldb +%license LICENSE.TXT +%{python3_sitearch}/lldb/ +%endif +##endregion LLDB files + + +##region MLIR files +%if %{with mlir} +%files -n mlir +%license LICENSE.TXT +%{_libdir}/libMLIR-C.so.* +%{_libdir}/libMLIR.so.* +%{_libdir}/libmlir_async_runtime.so.* +%{_libdir}/libmlir_c_runner_utils.so.* +%{_libdir}/libmlir_float16_utils.so.* +%{_libdir}/libmlir_runner_utils.so.* + +%files -n mlir-static +%license LICENSE.TXT +%{_libdir}/libMLIR*.a + +%files -n mlir-devel +%license LICENSE.TXT +%{_bindir}/mlir-cpu-runner +%{_bindir}/mlir-linalg-ods-yaml-gen +%{_bindir}/mlir-lsp-server +%{_bindir}/mlir-opt +%{_bindir}/mlir-pdll +%{_bindir}/mlir-pdll-lsp-server +%{_bindir}/mlir-reduce +%{_bindir}/mlir-tblgen +%{_bindir}/mlir-translate +%{_bindir}/tblgen-lsp-server +%{_libdir}/libMLIR-C.so +%{_libdir}/libMLIR.so +%{_libdir}/libmlir_async_runtime.so +%{_libdir}/libmlir_c_runner_utils.so +%{_libdir}/libmlir_float16_utils.so +%{_libdir}/libmlir_runner_utils.so +%{_includedir}/mlir +%{_includedir}/mlir-c +%{_libdir}/cmake/mlir + +%files -n python3-mlir +%license LICENSE.TXT +%{python3_sitearch}/mlir/ +%endif +##endregion MLIR files + + +%if %{with flang} +##region flang files +%files -n flang +%license flang/LICENSE.TXT +%{_bindir}/tco +%{_bindir}/bbc +%{_bindir}/flang-to-external-fc +%{_bindir}/fir-opt +%{_bindir}/flang-new +%{_libdir}/libFortranLower.so.%{maj_ver}* +%{_libdir}/libFortranSemantics.so.%{maj_ver}* +%{_libdir}/libFortranCommon.so.%{maj_ver}* +%{_libdir}/libFortranRuntime.so.%{maj_ver}* +%{_libdir}/libFortranDecimal.so.%{maj_ver}* +%{_libdir}/libFortranEvaluate.so.%{maj_ver}* +%{_libdir}/libFortranParser.so.%{maj_ver}* +%{_libdir}/libflangFrontend.so.%{maj_ver}* +%{_libdir}/libflangFrontendTool.so.%{maj_ver}* +%{_libdir}/libFIRAnalysis.so.%{maj_ver} +%{_libdir}/libFIRBuilder.so.%{maj_ver} +%{_libdir}/libFIRCodeGen.so.%{maj_ver} +%{_libdir}/libFIRDialect.so.%{maj_ver} +%{_libdir}/libFIRSupport.so.%{maj_ver} +%{_libdir}/libFIRTestAnalysis.so.%{maj_ver} +%{_libdir}/libFIRTransforms.so.%{maj_ver} +%{_libdir}/libHLFIRDialect.so.%{maj_ver} +%{_libdir}/libHLFIRTransforms.so.%{maj_ver} +%{_libdir}/libFIRDialectSupport.so.%{maj_ver} + +%files -n flang-devel +%license flang/LICENSE.TXT +%{_libdir}/libFortranLower.so +%{_libdir}/libFortranParser.so +%{_libdir}/libFortranCommon.so +%{_libdir}/libFortranSemantics.so +%{_libdir}/libFortran_main.a +%{_libdir}/libFIRAnalysis.so +%{_libdir}/libFIRBuilder.so +%{_libdir}/libFIRCodeGen.so +%{_libdir}/libFIRDialect.so +%{_libdir}/libFIRSupport.so +%{_libdir}/libFIRTestAnalysis.so +%{_libdir}/libFIRTransforms.so +%{_libdir}/libFortranDecimal.so +%{_libdir}/libFortranRuntime.so +%{_libdir}/libFortranEvaluate.so +%{_libdir}/libflangFrontend.so +%{_libdir}/libflangFrontendTool.so +%{_libdir}/libHLFIRDialect.so +%{_libdir}/libHLFIRTransforms.so +%{_libdir}/libFIRDialectSupport.so +%{_includedir}/flang +%{_libdir}/cmake/flang/ +%endif +##endregion flang files + + +##region libcxx files +%if %{with libcxx} +%files -n libcxx +%license LICENSE.TXT +%doc libcxx/CREDITS.TXT libcxx/TODO.TXT +%{_libdir}/libc++.so.* + +%files -n libcxx-devel +%license LICENSE.TXT +%{_includedir}/c++/ +%exclude %{_includedir}/c++/v1/cxxabi.h +%exclude %{_includedir}/c++/v1/__cxxabi_config.h +%{_libdir}/libc++.so + +%files -n libcxx-static +%license LICENSE.TXT +%{_libdir}/libc++.a +%{_libdir}/libc++experimental.a + +%files -n libcxxabi +%license libcxxabi/LICENSE.TXT +%doc libcxxabi/CREDITS.TXT +%{_libdir}/libc++abi.so.* + +%files -n libcxxabi-devel +%license LICENSE.TXT +%{_includedir}/c++/v1/cxxabi.h +%{_includedir}/c++/v1/__cxxabi_config.h +%{_libdir}/libc++abi.so + +%files -n libcxxabi-static +%license LICENSE.TXT +%{_libdir}/libc++abi.a + +%files -n llvm-libunwind +%license libunwind/LICENSE.TXT +%{_libdir}/libunwind.so.1 +%{_libdir}/libunwind.so.1.0 + +%files -n llvm-libunwind-devel +%{_includedir}/llvm-libunwind/ +%dir %{_libdir}/llvm-unwind +%{_libdir}/llvm-unwind/libunwind.so + +%files -n llvm-libunwind-static +%{_libdir}/libunwind.a +%endif +##endregion libcxx files + + +##region BOLT files +%if %{with build_bolt} +%files -n llvm-bolt +%license LICENSE.TXT +%{_bindir}/llvm-bolt +%{_bindir}/llvm-boltdiff +%{_bindir}/llvm-bolt-heatmap +%{_bindir}/merge-fdata +%{_bindir}/perf2bolt +%{_libdir}/libbolt_rt_hugify.a +%{_libdir}/libbolt_rt_instr.a +%endif +##endregion BOLT files + + +##region polly files +%if %{with polly} +%files -n polly +%license LICENSE.TXT +%{_libdir}/LLVMPolly.so +%{_libdir}/libPolly.so.* +%{_libdir}/libPollyISL.so +%{_mandir}/man1/polly.1 + +%files -n polly-devel +%license LICENSE.TXT +%{_libdir}/libPolly.so +%{_includedir}/polly/ +%{_libdir}/cmake/polly/ +%endif +##endregion polly files +##endregion files + + + +%changelog +* Thu Jul 24 2025 Zhao Zhen - 17.0.6-11 +- From seperated components building to all-in-one llvm-projects building. \ No newline at end of file diff --git a/run-lit-tests b/run-lit-tests new file mode 100644 index 0000000..1a448d0 --- /dev/null +++ b/run-lit-tests @@ -0,0 +1,66 @@ +#!/bin/bash + +usage() { + cat << EOF +usage: `basename $0` [OPTIONS] + --threads NUM The number of threads to use for running tests. + --multilib-arch ARCH Use this option to test 32-bit libs/binaries on + 64-bit hosts. +EOF +} + +threads_arg='' + +while [ $# -gt 0 ]; do + case $1 in + --threads) + shift + threads_arg="--threads $1" + ;; + --multilib-arch) + shift + ARCH=$1 + ;; + * ) + echo "unknown option: $1" + echo "" + usage + exit 1 + ;; + esac + shift +done + +if [ `whoami` = "root" ]; then + echo "error: lld tests do not support running as root." + exit 1 +fi + +set -xe + +if [ -z "$ARCH" ]; then + ARCH=`rpm --eval '%_arch'` +fi + +case $ARCH in + arm) + ;& + i686) + LIB_DIR="/usr/lib/" + ;; + *) + LIB_DIR="/usr/lib64/" + ;; +esac + +cd $(mktemp -d -p /var/tmp) +ln -s /usr/include include +tar -xzf /usr/share/lld/src/test.tar.gz +ln -s /usr/share/lld/src/$ARCH.site.cfg.py test/lit.site.cfg.py +ln -s /usr/share/lld/src/$ARCH.Unit.site.cfg.py test/Unit/lit.site.cfg.py + +LD_LIBRARY_PATH=$LIB_DIR/lld:$LD_LIBRARY_PATH \ +lit -v -s $threads_arg test \ + -Dlld_obj_root=`pwd` \ + -Dlld_test_root=`pwd`/test \ + -Dlld_unittest_bindir=$LIB_DIR/lld diff --git a/sources b/sources index 38802c5..5f06550 100644 --- a/sources +++ b/sources @@ -1,3 +1 @@ -SHA512 (llvm-17.0.6.src.tar.xz) = bf9b04d0d45c67168b195c550cd8326e3a01176f92776705846aad3956a494bcb7a053b0b0bde19abd68dc0068e5c97ef99dee7eadfdb727bc0d758b2684f3bd -SHA512 (cmake-17.0.6.src.tar.xz) = b2c5e404ca36542d44e1a7f2801bbcecbcf5f1e8c63b793bb2308228483406bdfe002720aadb913c0228cd2bbe5998465eaadc4a49fad6a5eb6ff907fa5fd45a -SHA512 (third-party-17.0.6.src.tar.xz) = 242dada4800c5e558f5f243e6aa0905d90ca3f82cc81baf14c60de543a7e737d4c2f3471122f2c641dc4f0724e4ebf5cf137761a231b34aab2a12f1cfc902c53 +SHA512 (llvm-project-17.0.6.src.tar.xz) = 6d85bf749e0d77553cc215cbfa61cec4ac4f4f652847f56f946b6a892a99a5ea40b6ab8b39a9708a035001f007986941ccf17e4635260a8b0c1fa59e78d41e30 -- Gitee From fc7b0b57feea6f68c67ad439d0c5e39f7947ea72 Mon Sep 17 00:00:00 2001 From: shouhuanxiaoji <345865759@163.com> Date: Tue, 12 Aug 2025 14:17:32 +0800 Subject: [PATCH 2/6] small fix --- llvm.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm.spec b/llvm.spec index c711ae6..0928782 100644 --- a/llvm.spec +++ b/llvm.spec @@ -270,7 +270,7 @@ BuildRequires: python3-recommonmark BuildRequires: python3-sphinx BuildRequires: python3-psutil BuildRequires: python3-pexpect -BuildRequires: python3-myst-parser +#BuildRequires: python3-myst-parser %if %{with gold} BuildRequires: binutils-devel BuildRequires: binutils-gold -- Gitee From 6846b88fc4da4a741b5dd8d7f453b3a844108bb3 Mon Sep 17 00:00:00 2001 From: shouhuanxiaoji <345865759@163.com> Date: Wed, 13 Aug 2025 18:56:29 +0800 Subject: [PATCH 3/6] disabled lto --- llvm.spec | 157 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 81 insertions(+), 76 deletions(-) diff --git a/llvm.spec b/llvm.spec index 0928782..6609c57 100644 --- a/llvm.spec +++ b/llvm.spec @@ -4,7 +4,6 @@ %global patch_ver 6 ##region components -%bcond_without check %bcond_without python_lit %bcond_without lldb %bcond_without mlir @@ -20,15 +19,9 @@ %endif # llvm <18 does not support building polly shared libraries when building libLLVM.so %bcond_with polly - ##endregion components - -# Disable LTO on x86 and riscv in order to reduce memory consumption. -%ifarch riscv64 +%bcond_without check %bcond_with lto_build -%else -%bcond_without lto_build -%endif %if 0%{without lto_build} %global _lto_cflags %nil @@ -980,6 +973,15 @@ popd ##region libcxx options %if %{with libcxx} +%global extra_runtimes_cmake_args \\\ + -DLIBCXX_ENABLE_LTO=OFF \\\ + -DLIBCXXABI_ENABLE_LTO=OFF \\\ + -DLIBUNWIND_ENABLE_LTO=OFF \\\ + -DCMAKE_C_FLAGS="-fno-lto" \\\ + -DCMAKE_CXX_FLAGS="-fno-lto" \\\ + -DCMAKE_EXE_LINKER_FLAGS="-fno-lto" \\\ + -DCMAKE_SHARED_LINKER_FLAGS="-fno-lto" \\\ + -DCMAKE_STATIC_LINKER_FLAGS="-fno-lto" %global cmake_config_args %{cmake_config_args} \\\ -DLIBCXX_ENABLE_LTO=OFF \\\ -DLIBCXXABI_ENABLE_LTO=OFF \\\ @@ -1049,8 +1051,8 @@ popd ##region flang options ##endregion mlir options %if %{with flang} -%global cmake_config_args %{cmake_config_args} \\\ - -DFLANG_INCLUDE_DOCS=ON +%global cmake_config_args %{cmake_config_args} \\\ + -DFLANG_INCLUDE_DOCS=ON %endif ##endregion flang options @@ -1146,7 +1148,10 @@ cd $OLD_CWD %global extra_cmake_opts %{nil} -%cmake -G Ninja %{cmake_config_args} %{extra_cmake_opts} $extra_cmake_args +%cmake -G Ninja \ + %{cmake_config_args} \ + %{extra_cmake_opts} \ + ${extra_cmake_args} # Build libLLVM.so first. This will help reduce OOM. %cmake_build --target LLVM @@ -1475,8 +1480,8 @@ rm -f %{buildroot}%{_libdir}/*.bc rm -f %{buildroot}%{_libdir}/libbolt_rt_instr_osx.a # clean redundant manpages -rm -f %{buildroot}%{_mandir}/man1/*-%{maj_ver}.1 -rm -f %{buildroot}%{_mandir}/man1/extraclangtools.1 +rm -f %{buildroot}%{_mandir}/man1/*-%{maj_ver}.* +rm -f %{buildroot}%{_mandir}/man1/extraclangtools.* @@ -1601,61 +1606,61 @@ fi %{_bindir}/UnicodeNameMappingGenerator %{_bindir}/verify-uselistorder %{_bindir}/yaml2obj -%{_mandir}/man1/bugpoint* -%{_mandir}/man1/clang-tblgen* +%{_mandir}/man1/bugpoint.* +%{_mandir}/man1/clang-tblgen.* %{_mandir}/man1/dsymutil* -%{_mandir}/man1/FileCheck* -%{_mandir}/man1/lit* -%{_mandir}/man1/llc* -%{_mandir}/man1/lldb-tblgen* -%{_mandir}/man1/lli* -%{_mandir}/man1/llvm-addr2line* -%{_mandir}/man1/llvm-ar* -%{_mandir}/man1/llvm-as* -%{_mandir}/man1/llvm-bcanalyzer* -%{_mandir}/man1/llvm-cov* -%{_mandir}/man1/llvm-cxxfilt* -%{_mandir}/man1/llvm-cxxmap* -%{_mandir}/man1/llvm-debuginfo-analyzer* -%{_mandir}/man1/llvm-diff* -%{_mandir}/man1/llvm-dis* -%{_mandir}/man1/llvm-dwarfdump* -%{_mandir}/man1/llvm-dwarfutil* -%{_mandir}/man1/llvm-exegesis* -%{_mandir}/man1/llvm-extract* -%{_mandir}/man1/llvm-ifs* -%{_mandir}/man1/llvm-install-name-tool* -%{_mandir}/man1/llvm-lib.1 -%{_mandir}/man1/llvm-libtool-darwin.1 -%{_mandir}/man1/llvm-link* -%{_mandir}/man1/llvm-lipo* -%{_mandir}/man1/llvm-locstats* -%{_mandir}/man1/llvm-mc.1 -%{_mandir}/man1/llvm-mca.1 -%{_mandir}/man1/llvm-nm* -%{_mandir}/man1/llvm-objcopy* -%{_mandir}/man1/llvm-objdump* -%{_mandir}/man1/llvm-opt-report* -%{_mandir}/man1/llvm-otool* -%{_mandir}/man1/llvm-pdbutil* -%{_mandir}/man1/llvm-profdata* -%{_mandir}/man1/llvm-profgen* -%{_mandir}/man1/llvm-ranlib* -%{_mandir}/man1/llvm-readelf* -%{_mandir}/man1/llvm-readobj* -%{_mandir}/man1/llvm-reduce* -%{_mandir}/man1/llvm-remarkutil* -%{_mandir}/man1/llvm-remark-size-diff* -%{_mandir}/man1/llvm-size* -%{_mandir}/man1/llvm-stress* -%{_mandir}/man1/llvm-strings* -%{_mandir}/man1/llvm-strip* -%{_mandir}/man1/llvm-symbolizer* -%{_mandir}/man1/llvm-tblgen* -%{_mandir}/man1/llvm-tli-checker* -%{_mandir}/man1/mlir-tblgen* -%{_mandir}/man1/opt* -%{_mandir}/man1/tblgen* +%{_mandir}/man1/FileCheck.* +%{_mandir}/man1/lit.* +%{_mandir}/man1/llc.* +%{_mandir}/man1/lldb-tblgen.* +%{_mandir}/man1/lli.* +%{_mandir}/man1/llvm-addr2line.* +%{_mandir}/man1/llvm-ar.* +%{_mandir}/man1/llvm-as.* +%{_mandir}/man1/llvm-bcanalyzer.* +%{_mandir}/man1/llvm-cov.* +%{_mandir}/man1/llvm-cxxfilt.* +%{_mandir}/man1/llvm-cxxmap.* +%{_mandir}/man1/llvm-debuginfo-analyzer.* +%{_mandir}/man1/llvm-diff.* +%{_mandir}/man1/llvm-dis.* +%{_mandir}/man1/llvm-dwarfdump.* +%{_mandir}/man1/llvm-dwarfutil.* +%{_mandir}/man1/llvm-exegesis.* +%{_mandir}/man1/llvm-extract.* +%{_mandir}/man1/llvm-ifs.* +%{_mandir}/man1/llvm-install-name-tool.* +%{_mandir}/man1/llvm-lib.* +%{_mandir}/man1/llvm-libtool-darwin.* +%{_mandir}/man1/llvm-link.* +%{_mandir}/man1/llvm-lipo.* +%{_mandir}/man1/llvm-locstats.* +%{_mandir}/man1/llvm-mc.* +%{_mandir}/man1/llvm-mca.* +%{_mandir}/man1/llvm-nm.* +%{_mandir}/man1/llvm-objcopy.* +%{_mandir}/man1/llvm-objdump.* +%{_mandir}/man1/llvm-opt-report.* +%{_mandir}/man1/llvm-otool.* +%{_mandir}/man1/llvm-pdbutil.* +%{_mandir}/man1/llvm-profdata.* +%{_mandir}/man1/llvm-profgen.* +%{_mandir}/man1/llvm-ranlib.* +%{_mandir}/man1/llvm-readelf.* +%{_mandir}/man1/llvm-readobj.* +%{_mandir}/man1/llvm-reduce.* +%{_mandir}/man1/llvm-remarkutil.* +%{_mandir}/man1/llvm-remark-size-diff.* +%{_mandir}/man1/llvm-size.* +%{_mandir}/man1/llvm-stress.* +%{_mandir}/man1/llvm-strings.* +%{_mandir}/man1/llvm-strip.* +%{_mandir}/man1/llvm-symbolizer.* +%{_mandir}/man1/llvm-tblgen.* +%{_mandir}/man1/llvm-tli-checker.* +%{_mandir}/man1/mlir-tblgen.* +%{_mandir}/man1/opt.* +%{_mandir}/man1/tblgen.* %{_datadir}/opt-viewer/ %files -n llvm-libs @@ -1675,7 +1680,7 @@ fi %{_bindir}/llvm-config-%{maj_ver} %{_libdir}/libLLVM.so %{_libdir}/cmake/llvm/ -%{_mandir}/man1/llvm-config* +%{_mandir}/man1/llvm-config.* %{_includedir}/llvm/ %{_includedir}/llvm-c/ @@ -1718,8 +1723,8 @@ fi %{_bindir}/clang++-%{maj_ver} %{_bindir}/clang-cl %{_bindir}/clang-cpp -%{_mandir}/man1/clang.1 -%{_mandir}/man1/clang++.1 +%{_mandir}/man1/clang.* +%{_mandir}/man1/clang++.* %files -n clang-libs %license LICENSE.TXT @@ -1756,7 +1761,7 @@ fi %{_libexecdir}/intercept-cc %{_datadir}/scan-view/ %{_datadir}/scan-build/ -%{_mandir}/man1/scan-build.1 +%{_mandir}/man1/scan-build.* %{python3_sitelib}/libear/ %{python3_sitelib}/libscanbuild/ @@ -1792,7 +1797,7 @@ fi %{_bindir}/find-all-symbols %{_bindir}/modularize %{_bindir}/clang-format-diff -%{_mandir}/man1/diagtool.1 +%{_mandir}/man1/diagtool.* %{_emacs_sitestartdir}/clang-format.el %{_emacs_sitestartdir}/clang-include-fixer.el %{_emacs_sitestartdir}/clang-rename.el @@ -1868,7 +1873,7 @@ fi %{_bindir}/lld %{_bindir}/lld-link %{_bindir}/wasm-ld -%{_mandir}/man1/ld.lld.1 +%{_mandir}/man1/ld.lld.* %files -n lld-devel %license LICENSE.TXT @@ -1911,8 +1916,8 @@ fi %{_bindir}/lldb-vscode %{_libdir}/liblldb.so.* %{_libdir}/liblldbIntelFeatures.so.* -%{_mandir}/man1/lldb.1 -%{_mandir}/man1/lldb-server.1 +%{_mandir}/man1/lldb.* +%{_mandir}/man1/lldb-server.* %files -n lldb-devel %license LICENSE.TXT @@ -2100,7 +2105,7 @@ fi %{_libdir}/LLVMPolly.so %{_libdir}/libPolly.so.* %{_libdir}/libPollyISL.so -%{_mandir}/man1/polly.1 +%{_mandir}/man1/polly.* %files -n polly-devel %license LICENSE.TXT -- Gitee From 20d58244e2a2785206f7c1b135263ae32e1b296c Mon Sep 17 00:00:00 2001 From: shouhuanxiaoji <345865759@163.com> Date: Thu, 14 Aug 2025 15:06:16 +0800 Subject: [PATCH 4/6] fixed aarch64 building --- llvm.spec | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm.spec b/llvm.spec index 6609c57..5d2f58f 100644 --- a/llvm.spec +++ b/llvm.spec @@ -2092,9 +2092,11 @@ fi %{_bindir}/llvm-bolt-heatmap %{_bindir}/merge-fdata %{_bindir}/perf2bolt +%ifarch x86_64 %{_libdir}/libbolt_rt_hugify.a %{_libdir}/libbolt_rt_instr.a %endif +%endif ##endregion BOLT files -- Gitee From c9a1b294d11580dbedcf4451aa4254899eef0515 Mon Sep 17 00:00:00 2001 From: shouhuanxiaoji <345865759@163.com> Date: Thu, 14 Aug 2025 16:16:20 +0800 Subject: [PATCH 5/6] removed unpackaged file --- llvm.spec | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm.spec b/llvm.spec index 5d2f58f..294cdfd 100644 --- a/llvm.spec +++ b/llvm.spec @@ -1492,9 +1492,6 @@ cd llvm %cmake_test || true %endif -# Do this here instead of in install so the check targets are also included. -cp %{_vpath_builddir}/.ninja_log %{buildroot}%{_datadir} - %post -n lld -- Gitee From d42295fe9173363d14accbeb3914a7ab89f2c125 Mon Sep 17 00:00:00 2001 From: jeremiazhao Date: Thu, 14 Aug 2025 12:51:39 +0000 Subject: [PATCH 6/6] fixed installing error Signed-off-by: jeremiazhao --- llvm.spec | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/llvm.spec b/llvm.spec index 294cdfd..fe73860 100644 --- a/llvm.spec +++ b/llvm.spec @@ -344,28 +344,13 @@ lit is a tool used by the LLVM project for executing its test suites. ##region LLVM packages -%package -n llvm-filesystem -Summary: Filesystem package that owns the versioned llvm prefix - -%description -n llvm-filesystem -This packages owns the versioned llvm prefix directory: $libdir/llvm$version - %package -n llvm-devel Summary: Libraries and header files for LLVM -Requires: llvm = %{version}-%{release} -Requires: llvm-libs = %{version}-%{release} -# The installed LLVM cmake files will add -ledit to the linker flags for any -# app that requires the libLLVMLineEditor, so we need to make sure -# libedit-devel is available. -%if %{with libedit} +Requires: %{name} = %{version}-%{release} +Requires: %{name}-libs = %{version}-%{release} +# for -ledit to the linker flags Requires: libedit-devel -%endif -Requires: libzstd-devel -Requires: llvm-static = %{version}-%{release} -Requires: llvm-test = %{version}-%{release} -Requires: llvm-googletest = %{version}-%{release} -Requires(post): alternatives -Requires(postun): alternatives +Requires: %{name}-test = %{version}-%{release} Provides: llvm-devel(major) = %{maj_ver} %description -n llvm-devel @@ -374,16 +359,16 @@ programs that use the LLVM infrastructure. %package -n llvm-libs Summary: LLVM shared libraries -Requires: llvm-filesystem = %{version}-%{release} %description -n llvm-libs Shared libraries for the LLVM compiler infrastructure. %package -n llvm-static Summary: LLVM static libraries -Requires: llvm-filesystem = %{version}-%{release} +Requires: %{name} = %{version}-%{release} +Requires: %{name}-libs = %{version}-%{release} Provides: llvm-static(major) = %{maj_ver} - + %description -n llvm-static Static libraries for the LLVM compiler infrastructure. @@ -404,7 +389,8 @@ Provides: llvm-test(major) = %{maj_ver} LLVM regression tests. %package -n llvm-googletest -Requires: llvm-filesystem = %{version}-%{release} +Requires: llvm = %{version}-%{release} +Requires: llvm-libs = %{version}-%{release} Summary: LLVM's modified googletest sources %description -n llvm-googletest @@ -760,8 +746,9 @@ Static library for LLVM libunwind. Summary: A post-link optimizer developed to speed up large applications License: Apache-2.0 WITH LLVM-exception URL: https://github.com/llvm/llvm-project/tree/main/bolt -Requires: llvm-filesystem = %{version}-%{release} -Recommends: gperftools-devel +Requires: llvm = %{version}-%{release} +Requires: llvm-libs = %{version}-%{release} +Recommends: gperftools-devel %description -n llvm-bolt -- Gitee